[med-svn] [khmer] 01/02: Imported Upstream version 2.0~rc2+dfsg

Fri Jul 31 16:33:14 UTC 2015

This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository khmer.

commit 4347c2e4b17b1099038eba2b728d34e320c26a6a
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date:   Fri Jul 31 07:08:12 2015 -0700

    Imported Upstream version 2.0~rc2+dfsg
---
 .mailmap                                           |  47 ++
 .ycm_extra_conf.py                                 | 105 ----
 CITATION                                           |  20 +
 ChangeLog                                          | 244 ++++++++
 MANIFEST.in                                        |   1 +
 Makefile                                           |  63 +-
 doc/dev/getting-started.rst                        |  25 +
 doc/release-notes/release-1.4.rst                  |   2 +-
 doc/user/install.rst                               |   5 +-
 doc/user/scripts.rst                               |   3 +
 doc/whats-new-2.0.rst                              |  30 +
 jenkins-build.sh                                   |  21 +-
 khmer/__init__.py                                  | 113 +++-
 khmer/_khmer.cc                                    | 671 ++++++++++++++-------
 khmer/_version.py                                  |   4 +-
 khmer/kfile.py                                     |  35 +-
 khmer/khmer_args.py                                |  19 +-
 khmer/thread_utils.py                              |   2 +-
 khmer/utils.py                                     |  12 +-
 lib/Makefile                                       | 220 +++----
 lib/counting.cc                                    |  85 ++-
 lib/counting.hh                                    |  23 +-
 lib/get_version.py                                 |   1 +
 lib/hashbits.cc                                    |  30 +-
 lib/hashbits.hh                                    |   6 +
 lib/hashtable.cc                                   | 132 ++--
 lib/hashtable.hh                                   |  55 +-
 lib/hllcounter.cc                                  |  35 +-
 lib/hllcounter.hh                                  |  11 +-
 lib/ht-diff.cc                                     | 149 -----
 lib/khmer_exception.hh                             |  32 +-
 lib/kmer_hash.cc                                   |  10 +-
 lib/labelhash.cc                                   |  33 +-
 lib/labelhash.hh                                   |  16 +-
 lib/{khmer.pc.in => oxli.pc.in}                    |   4 +-
 lib/perf_metrics.cc                                |  35 --
 lib/perf_metrics.hh                                |  75 ---
 lib/read_aligner.cc                                | 235 ++++++--
 lib/read_aligner.hh                                |  37 +-
 lib/read_parsers.cc                                |  25 +-
 lib/read_parsers.hh                                |  43 +-
 lib/subset.cc                                      |  40 +-
 lib/subset.hh                                      |  10 +-
 lib/test-HashTables.cc                             | 150 -----
 lib/test-Parser.cc                                 | 145 -----
 lib/test-compile.cc                                |   5 +-
 lib/trace_logger.cc                                |  70 ---
 lib/trace_logger.hh                                |  76 ---
 oxli/__init__.py                                   |   6 +-
 oxli/build_graph.py                                |  12 +-
 oxli/functions.py                                  | 129 +++-
 sandbox/Makefile.read_aligner_training             |  26 +
 sandbox/README.rst                                 |   4 +-
 sandbox/build-sparse-graph.py                      |   2 +-
 sandbox/calc-best-assembly.py                      |  17 +-
 sandbox/collect-reads.py                           |   9 +-
 sandbox/collect-variants.py                        |   6 +-
 sandbox/correct-errors.py                          | 219 -------
 .../trim-low-abund.py => sandbox/correct-reads.py  | 246 ++++----
 sandbox/count-kmers-single.py                      | 103 ++++
 sandbox/count-kmers.py                             |  80 +++
 sandbox/error-correct-pass2.py                     |  94 +++
 sandbox/estimate_optimal_hash.py                   |  11 +-
 sandbox/extract-single-partition.py                |   2 +-
 sandbox/optimal_args_hashbits.py                   |   2 +-
 sandbox/readaligner_pairhmm_train.py               | 205 +++++++
 sandbox/saturate-by-median.py                      |   4 +-
 sandbox/sweep-files.py                             |   8 +-
 sandbox/sweep-reads.py                             |   8 +-
 scripts/abundance-dist-single.py                   |  34 +-
 scripts/abundance-dist.py                          |  32 +-
 scripts/count-median.py                            |  36 +-
 scripts/count-overlap.py                           |  20 +-
 scripts/extract-long-sequences.py                  |   9 +-
 scripts/extract-paired-reads.py                    |   4 +-
 scripts/extract-partitions.py                      |   3 +-
 scripts/fastq-to-fasta.py                          |   7 +-
 scripts/filter-abund-single.py                     |  13 +-
 scripts/filter-abund.py                            |  18 +-
 scripts/interleave-reads.py                        |  31 +-
 scripts/load-graph.py                              |  12 +-
 scripts/load-into-counting.py                      |  21 +-
 scripts/make-initial-stoptags.py                   |   4 +-
 scripts/normalize-by-median.py                     | 214 ++++---
 scripts/oxli                                       |  16 +
 scripts/partition-graph.py                         |   4 +-
 scripts/readstats.py                               |  18 +-
 scripts/sample-reads-randomly.py                   |  12 +-
 scripts/split-paired-reads.py                      |  12 +-
 scripts/trim-low-abund.py                          |  30 +-
 {sandbox => scripts}/unique-kmers.py               |  60 +-
 setup.py                                           |   2 +-
 tests/khmer_tst_utils.py                           | 108 ++--
 tests/test-data/empty-file.bz2                     | Bin 0 -> 14 bytes
 tests/test-data/empty-file.gz                      | Bin 0 -> 32 bytes
 tests/test-data/paired-broken4.fq.1                |   4 +
 tests/test-data/paired-broken4.fq.2                |   4 +
 tests/test-data/paired.fq.2                        |   1 -
 tests/test-data/readaligner-default.json           |  50 ++
 tests/test-data/readaligner-k12.json               |  50 ++
 tests/test-data/test-fastq-reads.fa                | 200 ++++++
 tests/test_counting_hash.py                        |  76 ++-
 tests/test_counting_single.py                      |   4 +-
 tests/test_filter.py                               |   2 +-
 tests/test_functions.py                            |   9 +-
 tests/test_hashbits.py                             | 568 ++++++++---------
 tests/test_hll.py                                  |   2 +-
 tests/test_labelhash.py                            |  12 +-
 tests/test_lump.py                                 |   4 +-
 tests/test_normalize_by_median.py                  | 212 +++++--
 tests/test_oxli_functions.py                       |  40 +-
 tests/test_read_aligner.py                         | 502 +++++++++++++--
 tests/test_read_parsers.py                         |  42 +-
 tests/test_sandbox_scripts.py                      |  40 +-
 tests/test_script_arguments.py                     |  59 +-
 tests/test_scripts.py                              | 398 +++++++-----
 tests/test_streaming_io.py                         | 451 ++++++++++++++
 tests/test_subset_graph.py                         |  16 +-
 118 files changed, 5034 insertions(+), 2835 deletions(-)

diff --git a/.mailmap b/.mailmap
new file mode 100644
index 0000000..4e1b7d4
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,47 @@
+Michael R. Crusoe <mcrusoe at msu.edu>
+Michael R. Crusoe <mcrusoe at msu.edu> <michael.crusoe at gmail.com>
+Michael R. Crusoe <mcrusoe at msu.edu> <mcruseo at msu.edu>
+Michael R. Crusoe <mcrusoe at msu.edu> <mcrusoe at athyra.(none)>
+Camille Scott <camille.scott.w at gmail.com> <cs.welcher at gmail.com>
+Tamer Mansour <drtamermansour at gmail.com>
+Rhys Kidd <rhyskidd at gmail.com>
+Susan Steinman <susan.steinman at gmail.com> <steinman.tutoring at gmail.com>
+Adina Howe <adina at iastate.edu> <howead at msu.edu>
+Elmar Bucher <buchere at ohsu.edu> <elmbeech at zoho.com>
+Luiz Irber <luiz.irber at gmail.com> <luizirber at users.noreply.github.com>
+Luiz Irber <luiz.irber at gmail.com> <irberlui at msu.edu> 
+Qingpeng Zhang <qingpeng at gmail.com> <qingpeng at msu.edu>
+Reed Cartwright <cartwright at asu.edu>
+Reed Cartwright <cartwright at asu.edu> <reed at cartwrig.ht>
+Jacob Fenton <bocajnotnef at gmail.com>
+Michael Wright <wrigh517 at gmail.com> <narrows at 13-67-33.client.wireless.msu.edu>
+Eric McDonald <em at msu.edu> <emcd.msu at gmail.com>
+Jared Simpson <js18 at sanger.ac.uk> <jared.simpson at gmail.com>
+Benjamin Taylor <taylo886 at msu.edu> <taylo886 at cse.msu.edu>
+Kaben Nanlohy <kaben.nanlohy at gmail.com> <kaben at idyll.org>
+Ramakrishnan Srinivasan <ramrs at nyu.edu>
+Rodney Picett <pickett.rodney at gmail.com>
+Sarah Guermond <sarah.guermond at gmail.com> <sguermond at users.noreply.github.com>
+Sarah Guermond <sarah.guermond at gmail.com> <s.guermond at gmail.com>
+Hussien F. Alameldin <hussien at msu.edu> <hussienfotoh at gmail.com>
+Brian Wyss <wyssbria at msu.edu>
+Heather L. Wiencko <wienckhl at tcd.ie> <timiat at yahoo.com>
+Jiarong Guo <guojiaro at gmail.com>
+Josiah Seaman <josiah at dnaskittle.com> <josiah.seaman at gmail.com>
+Leonor Garcia-Gutierrez <l.garcia-gutierrez at warwick.ac.uk>
+Ryan R. Boyce <boycerya at msu.edu>
+en zyme <en_zyme at outlook.com> <enzyme at bu.edu>
+Scott Sievert <sieve121 at umn.edu>
+Joshua R. Nahum <joshnahum at gmail.com>
+Jonathan Gluck <jdg at cs.umd.edu> <jonathangluck08854 at gmail.com>
+Joshua R. Herr <joshua.r.herr at gmail.com>
+Bede Constantinides <bede.constantinides at manchester.ac.uk> <bedeabc at gmail.com>
+Kevin D. Murray <kevin.murray at anu.edu.au> <spam at kdmurray.id.au>
+James A. Stapleton <jas at msu.edu>
+Scott Fay <scott.a.fay at gmail.com> <scott.fay at invitae.com>
+Iván González <igonzalez at mailaps.org> <iglpdc at gmail.com>
+Sherine Awad <drmahmoud at ucdavis.edu> <sherine.awad at gmail.com>
+Alexander Johan Nederbragt <lex.nederbragt at ibv.uio.no>
+Charles Pepe-Ranney <chuck.peperanney at gmail.com>
+Jeramia Ory <Jeramia.Ory at stlcop.edu> <jeramia.ory at gmail.com>
+<jared.simpson at oicr.on.ca> <js18 at sanger.ac.uk>
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
deleted file mode 100644
index fcc7939..0000000
--- a/.ycm_extra_conf.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# This file is NOT licensed under the GPLv3, which is the license for the rest
-# of YouCompleteMe.
-#
-# Here's the license text for this file:
-#
-# This is free and unencumbered software released into the public domain.
-#
-# Anyone is free to copy, modify, publish, use, compile, sell, or
-# distribute this software, either in source code form or as a compiled
-# binary, for any purpose, commercial or non-commercial, and by any
-# means.
-#
-# In jurisdictions that recognize copyright laws, the author or authors
-# of this software dedicate any and all copyright interest in the
-# software to the public domain. We make this dedication for the benefit
-# of the public at large and to the detriment of our heirs and
-# successors. We intend this dedication to be an overt act of
-# relinquishment in perpetuity of all present and future rights to this
-# software under copyright law.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# For more information, please refer to <http://unlicense.org/>
-
-import os
-import ycm_core
-
-SOURCE_EXTENSIONS = ['.cpp', '.cxx', '.cc', '.c', '.m', '.mm']
-
-database = ycm_core.CompilationDatabase(
-    os.path.dirname(os.path.abspath(__file__)))
-
-def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
-    if not working_directory:
-        return list(flags)
-    new_flags = []
-    make_next_absolute = False
-    path_flags = ['-isystem', '-I', '-iquote', '--sysroot=']
-    for flag in flags:
-        new_flag = flag
-
-        if make_next_absolute:
-            make_next_absolute = False
-            if not flag.startswith('/'):
-                new_flag = os.path.join(working_directory, flag)
-
-        for path_flag in path_flags:
-            if flag == path_flag:
-                make_next_absolute = True
-                break
-
-            if flag.startswith(path_flag):
-                path = flag[len(path_flag):]
-                new_flag = path_flag + os.path.join(working_directory, path)
-                break
-
-        if new_flag:
-            new_flags.append(new_flag)
-    return new_flags
-
-
-def IsHeaderFile(filename):
-    extension = os.path.splitext(filename)[1]
-    return extension in ['.h', '.hxx', '.hpp', '.hh']
-
-
-def GetCompilationInfoForFile(filename):
-    # The compilation_commands.json file generated by CMake does not have entries
-    # for header files. So we do our best by asking the db for flags for a
-    # corresponding source file, if any. If one exists, the flags for that file
-    # should be good enough.
-    if IsHeaderFile(filename):
-        basename = os.path.splitext(filename)[0]
-        for extension in SOURCE_EXTENSIONS:
-            replacement_file = basename + extension
-            if os.path.exists(replacement_file):
-                compilation_info = database.GetCompilationInfoForFile(
-                    replacement_file)
-                if compilation_info.compiler_flags_:
-                    return compilation_info
-        return None
-    return database.GetCompilationInfoForFile(filename)
-
-
-def FlagsForFile(filename, **kwargs):
-    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
-    # python list, but a "list-like" StringVec object
-    compilation_info = GetCompilationInfoForFile(filename)
-    if not compilation_info:
-        return None
-
-    final_flags = MakeRelativePathsInFlagsAbsolute(
-        compilation_info.compiler_flags_,
-        compilation_info.compiler_working_dir_)
-
-    return {
-        'flags': final_flags,
-        'do_cache': True
-    }
diff --git a/CITATION b/CITATION
index 3b4b3c6..a1b09f3 100644
--- a/CITATION
+++ b/CITATION
@@ -107,6 +107,26 @@ the digital normalization algorithm, described in:
       url = "http://arxiv.org/abs/1203.4802",
   }
 
+Efficient k-mer error trimming
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The script trim-low-abund.py is described in:
+
+   Crossing the streams: a framework for streaming analysis of short DNA
+   sequencing reads
+   Zhang Q, Awad S, Brown CT
+   https://dx.doi.org/10.7287/peerj.preprints.890v1
+
+.. code-block:: tex
+
+  @unpublished{semistream,
+      author = "Qingpeng Zhang and Sherine Awad and C. Titus Brown",
+      title = "Crossing the streams: a framework for streaming analysis of short DNA sequencing reads",
+      year = "2015",
+      eprint = "PeerJ Preprints 3:e1100",
+      url = "https://dx.doi.org/10.7287/peerj.preprints.890v1"
+  }
+
 K-mer counting
 ^^^^^^^^^^^^^^
 
diff --git a/ChangeLog b/ChangeLog
index 2aeb3db..cd69867 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,247 @@
+2015-07-31  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/Makefile,Makefile,lib/*.pc.in,lib/test-compile.cc: Misc debian-based
+   compatiablity changes
+   * lib/get_version.py: Add crunchbang, chmod +x
+
+2015-07-29  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * khmer/_khmer.cc: add more CPyChecker inspired fixes
+   * lib/*.{cc,hh}: clean up includes and forward declarations
+
+2015-07-29  Luiz Irber  <khmer at luizirber.org>
+
+   * Makefile: Adapt Makefile rules for py3 changes.
+   * jenkins-build.sh: Read PYTHON_EXECUTABLE and TEST_ATTR from environment.
+
+2015-07-29  Amanda Charbonneau  <charbo24 at msu.edu>
+
+   * scripts/fastq-to-fasta.py: Changed '-n' default description to match
+   behaviour
+
+2015-07-29  Luiz Irber  <khmer at luizirber.org>
+
+   * tests/test_{scripts,streaming_io}.py: Fix the build + add a test
+
+2015-07-28  Titus Brown  <titus at idyll.org>
+
+   * tests/test_streaming_io.py: new shell cmd tests for streaming/piping.
+   * tests/khmer_tst_utils.py: refactor/replace runtestredirect(...) with
+   scriptpath(...) and run_shell_cmd(...).
+   * scripts/test_scripts.py: remove test_interleave_reads_broken_fq_4 for
+   only one input file for interleave-reads.py; replace runscriptredirect call
+   with run_shell_cmd.
+   * scripts/interleave-reads.py: force exactly two input files.
+   * scripts/split-paired-reads.py: fix print statement; clarify output.
+   * scripts/{normalize-by-median.py,sample-reads-randomly.py,
+   trim-low-abund.py}: if stdin is supplied for input, check that -o
+   specifies output file.
+   * scripts/filter-abund.py: if stdin is supplied for input, check that -o
+   specifies output file; switched -o to use argparse.FileType.
+   * scripts/extract-long-sequences.py: switched -o to use argparse.FileType.
+   * scripts/{abundance-dist,count-median}.py: added '-' handling for output.
+   * khmer/kfile.py: change 'check_input_files' to no longer warn that
+   '-' doesn't exist'.
+   * tests/test-data/paired.fq.2: removed extraneous newline from end.
+   * tests/{test_normalize_by_median,test_script_arguments,test_scripts}.py:
+   added tests for new code.
+   * scripts/oxli: added script for running tests in development directory.
+   * khmer/{__init__,khmer_args}.py,tests/{test_normalize_by_median,
+   test_script_arguments}.py: refactored out use of AssertionError by not
+   throwing plain Exceptions when a ValueError or RuntimeError would do.
+   * oxli/__init__.py: give default help instead of an error when `oxli` is
+   called with no arguments.
+   * tests/test_{normalize_by_median,sandbox_scripts,scripts,streaming_io}.py:
+   always check status code if calling `runscripts` with `fail_ok=True`.
+
+2015-07-28  Luiz Irber  <khmer at luizirber.org>
+
+   * sandbox/unique-kmers.py: moved to scripts.
+   * scripts/unique-kmers.py: fix import bug and initialize to_print earlier.
+   * tests/test_scripts.py: add tests for unique-kmers.py.
+   * doc/user/scripts.rst: added unique-kmers.py to script page
+
+2015-07-28  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * scripts/abundance-dist.py: disallowed forcing on the input file check for
+   the counting table file
+
+2015-07-28  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * .mailmap, Makefile: generate a list of authors
+
+2015-07-28  Kevin Murray  <spam at kdmurray.id.au>
+            Titus Brown  <titus at idyll.org>
+
+   * khmer/utils.py: added fix for SRA-style FASTQ output.
+   * tests/test_scripts.py: tested against a broken version of SRA format.
+   * tests/test-data/paired-broken4.fq.{1,2}: added test files.
+
+2015-07-28  Michael R. Crusoe  <crusoe at ucdavis.edu>
+            Titus Brown  <titus at idyll.org>
+
+   * lib/read_aligner.{cc,hh},tests/{test_read_aligner.py,
+   test-data/readaligner-{default,k12}.json},khmer/__init__.py: refactor,
+   read aligner parameters are now configurable & save/load-able. Can do
+   whole-genome variant finding.
+   * khmer/_khmer.cc,tests/test_read_aligner.py: ReadAligner.align_forward
+   method added
+   * sandbox/correct-errors.py -> sandbox/correct-reads.py: total rewrite
+   * sandbox/error-correct-pass2.py: new script
+   * sandbox/readaligner_pairhmm_train.py: new script
+   * tests/test_sandbox_scripts.py, doc/release-notes/release-1.4.rst:
+   spelling fixes, import re-arrangement
+   * sandbox/{Makefile.read_aligner_training,readaligner_pairhmm_train.py}:
+   Added script to train the aligner
+
+2015-07-27  Titus Brown  <titus at idyll.org>
+
+   * khmer/khmer_args.py,CITATION: added entry for PeerJ paper on
+   semi-streaming to citations.
+   * scripts/{abundance-dist-single.py,abundance-dist.py,count-median.py,
+   count-overlap.py,filter-abund-single.py,load-into-counting.py}: changed
+   default behavior to output data in CSV format and report total k-mers.
+   * tests/test_scripts.py: updated/removed tests for CSV.
+   * doc/whats-new-2.0.rst: added information about change in columnar output,
+   along with other minor corrections.
+   * scripts/normalize-by-median.py: corrected epilog.
+   * khmer/thread_utils.py,
+   sandbox/{calc-best-assembly.py,extract-single-partition.py},
+   scripts/{count-median.py,extract-long-sequences.py,extract-paired-reads.py,
+   extract-partitions.py,fastq-to-fasta.py,
+   interleave-reads.py,normalize-by-median.py,readstats.py,
+   sample-reads-randomly.py,split-paired-reads.py,trim-low-abund.py},
+   tests/{test_normalize_by_median.py,test_scripts.py}: remove explicit
+   'parse_description' from screed open calls.
+   * khmer/_khmer.cc,lib/Makefile,lib/hashtable.{cc,hh},setup.py: removed
+   WITH_INTERNAL_METRICS and trace_logger/perf_metrics references.
+   * lib/perf_metrics.{cc,hh},lib/trace_logger.{cc,hh}: removed unused files.
+
+2015-07-24  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * doc/dev/getting-started.rst: added instructions for second contribution
+
+2015-07-22  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * tests/test_read_parsers.py: added workaround for bug in OSX Python
+   * Makefile: respect that workaround when running the tests
+
+2015-07-21  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * khmer/{kfile,khmer_args}.py: refactored information passing, made it so
+   space checks happen in the right directory.
+   * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{
+   abundance-dist-single,filter-abund-single,load-into-counting,
+   normalize-by-median,trim-low-abund}.py,tests/test_script_arguments.py:
+   changed to use new arg structure for checking hashtable save space.
+   * oxli/functions.py,scripts/saturate-by-median.py: updated error message
+   to mention --force option.
+   * scripts/{count-overlap,load-into-counting,make-initial-stoptags,
+   partition-graph,sample-reads-randomly}.py: removed unnecessary call to
+   check_space.
+
+2015-07-20  Titus Brown  <titus at idyll.org>
+
+   * khmer/__init__.py: cleaned up FP rate reporting.
+   * scripts/normalize-by-median.py: corrected epilog; refactored reporting
+   to be a bit cleaner; use CSV for reporting file;
+   added --report-frequency arg.
+   * tests/test_normalize_by_median.py: updated/added tests for reporting.
+
+2015-07-17  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median,
+   abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits,
+   oxli_function}.py: pylint cleanup.
+
+2015-07-17  Michael R. Crusoe  <crusoe at ucdavis.edu>  
+
+   * Makefile, tests/test_read_aligner.py: import khmer when pylinting.
+
+2015-07-17  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * lib/read_parser.{cc,hh}: use std::string everywhere to match existing
+   exceptions.
+
+2015-07-10  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * khmer/kfile.py: changed check_valid_file_exists to recognize fifos as
+   non-empty.
+   * tests/test_normalize_by_median.py: added test.
+
+2015-07-10  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * oxli/functions.py: changed estimate functions to use correct letter
+   abbreviations.
+   * sandbox/estimate_optimal_hash.py: changed to use renamed estimate
+   functions.
+   * sandbox/unique-kmers.py: changed to not output recommended HT args by
+   default.
+   * tests/test_oxli_functions.py: changed to use renamed estimate functions.
+
+2015-07-10  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * oxli/functions.py: added '--force' check to sanity check.
+
+2015-07-10  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * oxli/functions.py: moved optimization/sanity check func to oxli.
+   * scripts/normalize-by-median.py,oxli/build_graph.py: added
+   optimization/sanity checking via oxli estimation funcs.
+   * tests/test_normalize_by_median.py: updated tests to cover estimation
+   functions.
+
+2015-07-08  Luiz Irber  <khmer at luizirber.org>
+
+   * lib/{counting,hashbits,hashtable,labelhash,subset}.cc: print hexadecimal
+   representation of the signature read from the file.
+
+2015-07-06  Luiz Irber  <khmer at luizirber.org>
+
+   * sandbox/collect-reads.py: Set a default value for coverage based
+   on the docstring.
+   * sandbox/count-kmers-single.py, tests/test_{functions,script_arguments}.py:
+   Replace xrange and cStringIO (not Python 3 compatible).
+   * lib/*.{hh,cc}, oxli/functions.py, tests/*.py: make format.
+
+2015-07-05  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired 
+   updates.
+
+2015-07-05  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * Makefile: fix cppcheck invocation.
+   * khmer/_khmer.cc: switch to prefix increment for non-primitive objects,
+   use a C++ cast, adjust scope.
+   * lib/hashtable.{hh,cc}: make copy constructor no-op explicit. adjust scope
+   * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code.
+   * lib/labelhash.cc,hllcounter.cc: astyle reformatting.
+   * lib/read_parsers.hh: more explicit constructors.
+
+2015-07-05  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * sandbox/{collect-variants,optimal_args_hashbits,sweep-files}.py:
+   update API usage.
+
+2015-07-05  Titus Brown  <titus at idyll.org>
+
+   * sandbox/{count-kmers.py,count-kmers-single.py}: added scripts to output
+   k-mer counts.
+   * tests/test_sandbox_scripts.py: added tests for count-kmers.py and
+   count-kmers-single.py.
+   * sandbox/README.rst: added count-kmers.py and count-kmers-single.py to
+   sandbox/README.
+
+2015-07-05  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/*.{cc,hh},sandbox/*.py,khmer/_khmer.cc,tests/test_*.py: Simplify
+   exception hierarchy, and ensure all C++ exceptions are converted to python
+   errors.
+   * scripts/normalize-by-median.py: Clarify error message.
+   * tests/khmer_tst_utils.py: Add longify function, converts int => long on
+   py2, and passes thru list unmodified on py3.
+
 2015-06-30  Jacob Fenton  <bocajnotnef at gmail.com>
 
    * tests/{test_script_arguments,test_functions}.py: changed tests to use
diff --git a/MANIFEST.in b/MANIFEST.in
index 2d49b98..31c1bf4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -10,6 +10,7 @@ include third-party/zlib/zconf.h.in third-party/zlib/zlib.pc.in
 exclude third-party/zlib/Makefile third-party/zlib/zconf.h
 recursive-include scripts filter-abund.xml normalize-by-median.xml README.txt
 graft tests
+include scripts/oxli
 global-exclude *.orig
 global-exclude *.pyc
 
diff --git a/Makefile b/Makefile
index 616efdc..9a1d378 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
 #  and documentation
 # make coverage-report to check coverage of the python scripts by the tests
 
-CPPSOURCES=$(wildcard lib/*.cc lib/*.hh khmer/_khmermodule.cc)
+CPPSOURCES=$(wildcard lib/*.cc lib/*.hh khmer/_khmer.cc)
 PYSOURCES=$(wildcard khmer/*.py scripts/*.py)
 SOURCES=$(PYSOURCES) $(CPPSOURCES) setup.py
 DEVPKGS=pep8==1.5.7 diff_cover autopep8 pylint coverage gcovr nose pep257 \
@@ -12,19 +12,27 @@ DEVPKGS=pep8==1.5.7 diff_cover autopep8 pylint coverage gcovr nose pep257 \
 
 GCOVRURL=git+https://github.com/nschum/gcovr.git@never-executed-branches
 VERSION=$(shell git describe --tags --dirty | sed s/v//)
-CPPCHECK=ls lib/*.cc khmer/_khmermodule.cc | grep -v test | cppcheck -DNDEBUG \
-	 -DVERSION=0.0.cppcheck -UNO_UNIQUE_RC --enable=all \
-	 --file-list=- --platform=unix64 --std=c++03 --inline-suppr \
+CPPCHECK=ls lib/*.cc khmer/_khmer.cc | grep -v test | cppcheck -DNDEBUG \
+	 -DVERSION=0.0.cppcheck -DSEQAN_HAS_BZIP2=1 -DSEQAN_HAS_ZLIB=1 \
+	 -UNO_UNIQUE_RC --enable=all --suppress='*:/usr/*' \
+	 --file-list=- --platform=unix64 --std=c++11 --inline-suppr \
 	 --quiet -Ilib -Ithird-party/bzip2 -Ithird-party/zlib \
-	 -Ithird-party/smhasher
+	 -Ithird-party/smhasher -I/usr/include/python3.4m -DHAVE_SSIZE_T \
+	 -D__linux__ -D__x86_64__ -D__LP64__ -I/usr/include \
+	 -I/usr/include/x86_64-linux-gnu/ -I/usr/include/linux \
+	 -I/usr/lib/gcc/x86_64-linux-gnu/4.9/include/
 
 UNAME := $(shell uname)
 ifeq ($(UNAME),Linux)
-	TESTATTR='!known_failing,!jenkins,!huge'
+	TESTATTR ?= '!known_failing,!jenkins,!huge'
 else
-	TESTATTR='!known_failing,!jenkins,!huge'
+	TESTATTR ?= '!known_failing,!jenkins,!huge,!linux'
 endif
 
+
+MODEXT=$(shell python -c "import sysconfig;print(sysconfig.get_config_var('SO'))")
+EXTENSION_MODULE = khmer/_khmer$(MODEXT)
+
 ## all         : default task; compile C++ code, build shared object library
 all: sharedobj
 
@@ -40,9 +48,9 @@ install-dependencies:
 	pip install --upgrade --requirement doc/requirements.txt
 
 ## sharedobj   : build khmer shared object file
-sharedobj: khmer/_khmermodule.so
+sharedobj: $(EXTENSION_MODULE)
 
-khmer/_khmermodule.so: $(CPPSOURCES)
+$(EXTENSION_MODULE): $(CPPSOURCES)
 	./setup.py build_ext --inplace
 
 coverage-debug: $(CPPSOURCES)
@@ -64,7 +72,7 @@ dist/khmer-$(VERSION).tar.gz: $(SOURCES)
 clean: FORCE
 	cd lib && ${MAKE} clean || true
 	cd tests && rm -rf khmertest_* || true
-	rm -f khmer/_khmermodule.so
+	rm -f $(EXTENSION_MODULE)
 	rm -f khmer/*.pyc lib/*.pyc
 	./setup.py clean --all || true
 	rm -f coverage-debug
@@ -141,6 +149,7 @@ format: astyle autopep8
 ## pylint      : run static code analysis on Python code
 pylint: $(PYSOURCES) $(wildcard tests/*.py)
 	pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
+                --extension-pkg-whitelist=khmer \
 		setup.py khmer/[!_]*.py khmer/__init__.py scripts/*.py tests \
 		oxli/*.py || true
 
@@ -155,9 +164,9 @@ diff_pylint_report: pylint_report.txt
 # We need to get coverage to look at our scripts. Since they aren't in a
 # python module we can't tell nosetests to look for them (via an import
 # statement). So we run nose inside of coverage.
-.coverage: $(PYSOURCES) $(wildcard tests/*.py) khmer/_khmermodule.so
+.coverage: $(PYSOURCES) $(wildcard tests/*.py) $(EXTENSION_MODULE)
 	coverage run --branch --source=scripts,khmer,oxli --omit=khmer/_version.py \
-		-m nose --with-xunit --attr=\!known_failing --processes=0
+		-m nose --with-xunit --attr $(TEST_ATTR) --processes=0
 
 coverage.xml: .coverage
 	coverage xml
@@ -173,7 +182,8 @@ coverage-report: .coverage
 
 coverage-gcovr.xml: coverage-debug .coverage
 	gcovr --root=. --branches --output=coverage-gcovr.xml --xml \
-          --gcov-exclude='.*zlib.*|.*bzip2.*|.*smhasher.*|.*seqan.*'
+          --gcov-exclude='.*zlib.*|.*bzip2.*|.*smhasher.*|.*seqan.*' \
+	  --exclude-unreachable-branches
 
 diff-cover: coverage-gcovr.xml coverage.xml
 	diff-cover coverage-gcovr.xml coverage.xml
@@ -207,13 +217,13 @@ libtest: FORCE
 	 $(MAKE) all && \
 	 $(MAKE) install PREFIX=../install_target
 	test -d install_target/include
-	test -f install_target/include/khmer.hh
+	test -f install_target/include/oxli/khmer.hh
 	test -d install_target/lib
-	test -f install_target/lib/libkhmer.a
+	test -f install_target/lib/liboxli.a
 	$(CXX) -o install_target/test-prog-static -I install_target/include \
-		lib/test-compile.cc install_target/lib/libkhmer.a
+		lib/test-compile.cc install_target/lib/liboxli.a
 	$(CXX) -o install_target/test-prog-dynamic -I install_target/include \
-		-L install_target/lib lib/test-compile.cc -lkhmer
+		-L install_target/lib lib/test-compile.cc -loxli
 	rm -rf install_target
 
 ## test        : run the khmer test suite
@@ -276,4 +286,23 @@ convert-release-notes:
 		pandoc --from=markdown --to=rst $${file} > $${file%%.md}.rst; \
 		done
 
+list-authors:
+	@echo '\author[1]{Michael R. Crusoe}'
+	@git log --format='\author[]{%aN}' | sort -uk2 | \
+		grep -v 'root\|crusoe\|titus'
+	@echo '\author[]{C. Titus Brown}'
+	@echo '\affil[1]{mcrusoe at msu.edu}'
+	@git log --format='\author[]{%aN} \affil[]{%aE}' | sort -uk2 | \
+		awk -F\\ '{print "\\"$$3}' | grep -v \
+		'root\|crusoe\|titus\|waffle\|boyce\|pickett.rodney'
+	# R. Boyce requested to be removed 2015/05/21
+	# via pers correspondence to MRC
+	# P Rodney requested to be removed 2015/06/22 via pers correspondence
+	# to MRC
+	@echo '\affil[]{titus at idyll.org}'
+
+list-author-emails:
+	@echo 'name, E-Mail Address'
+	@git log --format='%aN,%aE' | sort -u | grep -v 'root\|waffle\|boyce'
+
 FORCE:
diff --git a/doc/dev/getting-started.rst b/doc/dev/getting-started.rst
index a07c743..8434c0e 100644
--- a/doc/dev/getting-started.rst
+++ b/doc/dev/getting-started.rst
@@ -7,6 +7,8 @@
 Getting started with khmer development
 ======================================
 
+.. contents::
+
 This document is for people who would like to contribute to khmer.  It
 walks first-time contributors through making their own copy of khmer,
 building it, and submitting changes for review and merge into the master
@@ -332,3 +334,26 @@ Here are a few suggestions:
 
 * You can also help other people out by watching for new issues or
   looking at pull requests.  Remember to be nice and polite!
+
+Your second contribution...
+---------------------------
+
+Here are a few pointers on getting started on your second (or third,
+or fourth, or nth contribution).
+
+So, assuming you've found an issue you'd like to work on there are a
+couple things to do to make sure your local copy of the repository is
+ready for a new issue--specifically, we need to make sure it's in sync
+with the remote repository so you aren't working on a old copy. So::
+
+        git checkout master
+        git fetch --all
+        git pull
+
+This puts you on the latest master branch and pulls down updates from
+GitHub with any changes that may have been made since your last
+contribution (usually including the merge of your last
+contribution). Then we merge those changes into your local copy of the
+master branch.
+
+Now, you can go back to `Claiming an issue and starting to develop`_.
diff --git a/doc/release-notes/release-1.4.rst b/doc/release-notes/release-1.4.rst
index c257299..b9abf6c 100644
--- a/doc/release-notes/release-1.4.rst
+++ b/doc/release-notes/release-1.4.rst
@@ -83,7 +83,7 @@ correctly #781 @drtamermansour
 ``split-paired-reads.py``: added ``-o`` option to allow specification of
 an output directory #752 @bede
 
-Fixed a string formatting and a boundry error in
+Fixed a string formatting and a boundary error in
 ``sample-reads-randomly.py`` #773 @qingpeng #995 @ctb
 
 CSV output added to ``abundance-dist.py``, ``abundance-dist-single.py``,
diff --git a/doc/user/install.rst b/doc/user/install.rst
index e43709c..c3b49db 100644
--- a/doc/user/install.rst
+++ b/doc/user/install.rst
@@ -4,9 +4,8 @@
 Installing and running khmer
 ============================
 
-You'll need a 64-bit operating system, Python 2.7.x and internet access.
-
-The khmer project currently works with Python 2.6 but we target Python 2.7.x.
+You'll need a 64-bit operating system, internet access, and Python
+2.7.x OR Python 3.3 or greater.
 
 Build requirements
 ------------------
diff --git a/doc/user/scripts.rst b/doc/user/scripts.rst
index 3bc0cbb..8f2b327 100644
--- a/doc/user/scripts.rst
+++ b/doc/user/scripts.rst
@@ -58,6 +58,9 @@ k-mer counting and abundance filtering
 .. autoprogram:: count-overlap:get_parser()
         :prog: count-overlap.py
 
+.. autoprogram:: unique-kmers:get_parser()
+        :prog: unique-kmers.py
+
 .. _scripts-partitioning:
 
 Partitioning
diff --git a/doc/whats-new-2.0.rst b/doc/whats-new-2.0.rst
index 2a7f0c7..dc9cc69 100644
--- a/doc/whats-new-2.0.rst
+++ b/doc/whats-new-2.0.rst
@@ -3,6 +3,27 @@
 What's New In khmer 2.0?
 ########################
 
+New behavior
+============
+
+Digital normalization script now supports mixed paired and unpaired read input
+------------------------------------------------------------------------------
+
+`normalize-by-median.py` now supports mixed paired and unpaired (or
+"broken-paired") input. Behavior can be forced to either treat all
+reads as singletons or to require all reads be properly paired using
+:option:`--force-single` or :option:`--paired`, respectively. If
+:option:`--paired` is set, :option:`--unpaired-reads` can be used to
+include a file of unpaired reads. The unpaired reads will be examined
+after all of the other sequence files.
+
+Reservoir sampling script extracts paired reads by default
+----------------------------------------------------------
+
+`sample-reads-randomly.py` now retains pairs in the output, by
+default.  This can be overridden to match previous behavior
+with :option:`--force_single`.
+
 Incompatible changes
 ====================
 
@@ -27,3 +48,12 @@ this project.
 
 Files of the above types made in previous versions of khmer are not compatible
 with v2.0; the reverse is also true.
+
+Scripts now output columnar data in CSV format by default
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All scripts that output any kind of columnar data now do so in CSV format,
+with headers.  Previously this had to be enabled with :options:`--csv`.
+(Affects `abundance-dist-single.py`, `abundance-dist.py`, `count-median.py`,
+and `count-overlap.py`.) `normalize-by-median.py` also now outputs CSV
+when :option:`-R` is used.
diff --git a/jenkins-build.sh b/jenkins-build.sh
index 94f7d78..54042e9 100755
--- a/jenkins-build.sh
+++ b/jenkins-build.sh
@@ -4,11 +4,13 @@ make clean
 
 rm -Rf .env dist cov-int
 
-if type python2> /dev/null 2>&1
-then
-    PYTHON_EXECUTABLE=$(which python2)
-else
-    PYTHON_EXECUTABLE=$(which python)
+if [ -z "${PYTHON_EXECUTABLE}" ]; then
+    if type python2> /dev/null 2>&1
+    then
+        PYTHON_EXECUTABLE=$(which python2)
+    else
+        PYTHON_EXECUTABLE=$(which python)
+    fi
 fi
 virtualenv -p ${PYTHON_EXECUTABLE} .env
 
@@ -32,7 +34,7 @@ then
 	export CFLAGS="-pg -fprofile-arcs -ftest-coverage"
 	python setup.py build_ext --build-temp $PWD --debug --inplace \
 		--libraries gcov develop
-	make coverage-gcovr.xml coverage.xml
+	make coverage-gcovr.xml coverage.xml TESTATTR='!known_failing,!huge'
 	./setup.py install
 else
 	echo "gcov was not found (or we are on OSX), skipping coverage check"
@@ -70,5 +72,10 @@ fi
 # takes too long to run on every build
 #bash -ex -c 'cd examples/stamps/; ./do.sh' || { echo examples/stamps/do.sh no longer runs; /bin/false; }
 
-make lib
+unset CFLAGS
+unset LDFLAGS
+unset CPPFLAGS
+unset CXXFLAGS
+
+# Don't do lib too, as we already compile as part of libtest
 make libtest
diff --git a/khmer/__init__.py b/khmer/__init__.py
index 032ca7b..b0b48e9 100644
--- a/khmer/__init__.py
+++ b/khmer/__init__.py
@@ -7,12 +7,14 @@
 """This is khmer; please see http://khmer.readthedocs.org/."""
 
 from __future__ import print_function
+from math import log
+import json
 
 from khmer._khmer import CountingHash as _CountingHash
 from khmer._khmer import LabelHash as _LabelHash
 from khmer._khmer import Hashbits as _Hashbits
 from khmer._khmer import HLLCounter as _HLLCounter
-from khmer._khmer import ReadAligner
+from khmer._khmer import ReadAligner as _ReadAligner
 
 from khmer._khmer import forward_hash  # figuregen/*.py
 # tests/test_{functions,counting_hash,labelhash,counting_single}.py
@@ -147,7 +149,8 @@ def extract_countinghash_info(filename):
 
 def calc_expected_collisions(hashtable, force=False, max_false_pos=.2):
     """Do a quick & dirty expected collision rate calculation on a hashtable.
-    Check to see that collision rate is within threshold.
+
+    Also check to see that collision rate is within threshold.
 
     Keyword argument:
     hashtable: the hashtable object to inspect
@@ -171,8 +174,8 @@ def calc_expected_collisions(hashtable, force=False, max_false_pos=.2):
         print("** Do not use these results!!", file=sys.stderr)
         print("**", file=sys.stderr)
         print("** (estimated false positive rate of %.3f;" % fp_all,
-              file=sys.stderr)
-        print("max allowable %.3f" % max_false_pos, file=sys.stderr)
+              file=sys.stderr, end=' ')
+        print("max recommended %.3f)" % max_false_pos, file=sys.stderr)
         print("**", file=sys.stderr)
 
         if not force:
@@ -218,8 +221,8 @@ def get_n_primes_near_x(number, target):
         i -= 2
 
     if len(primes) != number:
-        raise Exception("unable to find %d prime numbers < %d" % (number,
-                                                                  target))
+        raise RuntimeError("unable to find %d prime numbers < %d" % (number,
+                                                                     target))
 
     return primes
 
@@ -287,3 +290,101 @@ class HLLCounter(_HLLCounter):
 
     def __len__(self):
         return self.estimate_cardinality()
+
+
+class ReadAligner(_ReadAligner):
+
+    """Sequence to graph aligner.
+
+    ReadAligner uses a CountingHash (the counts of k-mers in the target DNA
+    sequences) as an implicit De Bruijn graph. Input DNA sequences are aligned
+    to this graph via a paired Hidden Markov Model.
+
+    The HMM is configured upon class instantiation; default paramaters for the
+    HMM are provided in 'defaultTransitionProbablitites' and
+    'defaultScoringMatrix'.
+
+    The main method is 'align'.
+    """
+
+    defaultTransitionProbabilities = (  # _M, _Ir, _Ig, _Mu, _Iru, _Igu
+        (log(0.9848843, 2), log(0.0000735, 2), log(0.0000334, 2),
+         log(0.0150068, 2), log(0.0000017, 2), log(0.0000003, 2)),  # M_
+        (log(0.5196194, 2), log(0.4647955, 2), log(0.0059060, 2),
+         log(0.0096792, 2)),  # Ir_
+        (log(0.7611255, 2), log(0.2294619, 2), log(0.0072673, 2),
+         log(0.0021453, 2)),  # Ig_
+        (log(0.0799009, 2), log(0.0000262, 2), log(0.0001836, 2),
+         log(0.9161349, 2), log(0.0033370, 2), log(0.0004173, 2)),  # Mu_
+        (log(0.1434529, 2), log(0.0036995, 2), log(0.2642928, 2),
+         log(0.5885548, 2)),  # Iru_
+        (log(0.1384551, 2), log(0.0431328, 2), log(0.6362921, 2),
+         log(0.1821200, 2))  # Igu_
+    )
+
+    defaultScoringMatrix = [
+        log(0.955, 2), log(0.04, 2), log(0.004, 2), log(0.001, 2)]
+
+    def __new__(cls, counting_table, trusted_cov_cutoff, bits_theta,
+                **kwargs):
+
+        if 'filename' in kwargs:
+            with open(kwargs.pop('filename')) as paramfile:
+                params = json.load(paramfile)
+            scoring_matrix = params['scoring_matrix']
+            transition_probabilities = params['transition_probabilities']
+        else:
+            if 'scoring_matrix' in kwargs:
+                scoring_matrix = kwargs.pop('scoring_matrix')
+            else:
+                scoring_matrix = ReadAligner.defaultScoringMatrix
+            if 'transition_probabilities' in kwargs:
+                transition_probabilities = kwargs.pop(
+                    'transition_probabilities')
+            else:
+                transition_probabilities = \
+                    ReadAligner.defaultTransitionProbabilities
+        r = _ReadAligner.__new__(cls, counting_table, trusted_cov_cutoff,
+                                 bits_theta, scoring_matrix,
+                                 transition_probabilities)
+        r.graph = counting_table
+        return r
+
+    def __init__(self, *args, **kwargs):
+        """
+        ReadAligner initialization.
+
+        HMM state notation abbreviations:
+        M_t - trusted match; M_u - untrusted match
+        Ir_t - trusted read insert; Ir_u - untrusted read insert
+        Ig_t - trusted graph insert; Ig_u - untrusted graph insert
+
+        Keyword arguments:
+        filename - a path to a JSON encoded file providing the scoring matrix
+            for the HMM in an entry named 'scoring_matrix' and the transition
+            probababilties for the HMM in an entry named
+            'transition_probabilities'. If provided the remaining keyword
+            arguments are ignored. (default: None)
+        scoring_matrix - a list of floats: trusted match, trusted mismatch,
+            unstrusted match, untrusted mismatch. (default:
+                ReadAligner.defaultScoringMatrix)
+        transition_probabilities - A sparse matrix as a tuple of six tuples.
+            The inner tuples contain 6, 4, 4, 6, 4, and 4 floats respectively.
+            Transition are notated as 'StartState-NextState':
+            (
+              ( M_t-M_t,  M_t-Ir_t,  M_t-Ig_t,  M_t-M_u,  M_t-Ir_u,  M_t-Ig_u),
+              (Ir_t-M_t, Ir_t-Ir_t,            Ir_t-M_u, Ir_t-Ir_u           ),
+              (Ig_t-M_t,          , Ig_t-Ig_t, Ig_t-M_u,            Ig_t-Ig_u),
+              ( M_u-M_t,  M_u-Ir_t,  M_u-Ig_t,  M_u-M_u,  M_u-Ir_u,  M_u-Ig_u),
+              (Ir_u-M_t, Ir_u-Ir_t,            Ir_u-M_u, Ir_u-Ir_u           ),
+              (Ig_u-M_t,          , Ig_u-Ig_t, Ig_u-M_u,            Ig_u-Ig_u)
+            )
+            (default: ReadAligner.defaultTransitionProbabilities)
+
+
+        Note: the underlying CPython implementation creates the ReadAligner
+        during the __new__ process and so the class initialization actually
+        occurs there. Instatiation is documented here in __init__ as this is
+        the traditional way.
+        """
+        _ReadAligner.__init__(self)
diff --git a/khmer/_khmer.cc b/khmer/_khmer.cc
index d58e832..d41d438 100644
--- a/khmer/_khmer.cc
+++ b/khmer/_khmer.cc
@@ -75,57 +75,6 @@ extern "C" {
     MOD_INIT(_khmer);
 }
 
-// Configure module logging.
-//#define WITH_INTERNAL_TRACING
-namespace khmer
-{
-
-namespace python
-{
-
-#ifdef WITH_INTERNAL_TRACING
-#warning "Internal tracing of Python extension module is enabled."
-static uint8_t const    _MODULE_TRACE_LEVEL = TraceLogger:: TLVL_DEBUG9;
-static void     _trace_logger(
-    uint8_t level, char const * format, ...
-)
-{
-    static FILE *   _stream_handle  = NULL;
-
-    if (NULL == _stream_handle) {
-        _stream_handle = fopen( "pymod.log", "w" );
-    }
-
-    va_list varargs;
-
-    if (_MODULE_TRACE_LEVEL <= level) {
-        va_start( varargs, format );
-        vfprintf( _stream_handle, format, varargs );
-        va_end( varargs );
-        fflush( _stream_handle );
-    }
-
-}
-#endif
-
-
-} // namespace python
-
-} // namespace khmer
-
-
-class _khmer_exception
-{
-private:
-    std::string _message;
-public:
-    _khmer_exception(std::string message) : _message(message) { };
-    inline const std::string get_message() const
-    {
-        return _message;
-    };
-};
-
 /***********************************************************************/
 
 //
@@ -218,7 +167,7 @@ static PyGetSetDef khmer_Read_accessors [ ] = {
 
 static PyTypeObject khmer_Read_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)        /* init & ob_size */
-    "_khmer.Read",                         /* tp_name */
+    "_khmer.Read",                        /* tp_name */
     sizeof(khmer_Read_Object),            /* tp_basicsize */
     0,                                    /* tp_itemsize */
     (destructor)khmer_Read_dealloc,       /* tp_dealloc */
@@ -314,8 +263,8 @@ _ReadParser_new( PyTypeObject * subtype, PyObject * args, PyObject * kwds )
     try {
         myself->parser =
             IParser:: get_parser( ifile_name );
-    } catch (InvalidStreamHandle &exc) {
-        PyErr_SetString( PyExc_ValueError, exc.what() );
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString( PyExc_OSError, exc.what() );
         return NULL;
     }
     return self;
@@ -329,12 +278,13 @@ _ReadParser_iternext( PyObject * self )
     khmer_ReadParser_Object * myself  = (khmer_ReadParser_Object *)self;
     IParser *       parser  = myself->parser;
 
-    bool    stop_iteration = false;
-    char    const * exc = NULL;
-    Read *  the_read_PTR;
+    bool        stop_iteration  = false;
+    const char *value_exception = NULL;
+    const char *file_exception  = NULL;
+    Read       *the_read_PTR    = NULL;
     try {
         the_read_PTR = new Read( );
-    } catch (std::bad_alloc &e) {
+    } catch (std::bad_alloc &exc) {
         return PyErr_NoMemory();
     }
 
@@ -343,13 +293,13 @@ _ReadParser_iternext( PyObject * self )
     if (!stop_iteration) {
         try {
             parser->imprint_next_read( *the_read_PTR );
-        } catch (NoMoreReadsAvailable &e) {
+        } catch (NoMoreReadsAvailable &exc) {
             stop_iteration = true;
-        } catch (StreamReadError &e) {
-            exc = e.what();
-        } catch (InvalidRead &e) {
-            exc = e.what();
-	}
+        } catch (khmer_file_exception &exc) {
+            file_exception = exc.what();
+        } catch (khmer_value_exception &exc) {
+            value_exception = exc.what();
+        }
     }
     Py_END_ALLOW_THREADS
 
@@ -360,9 +310,14 @@ _ReadParser_iternext( PyObject * self )
         return NULL;
     }
 
-    if (exc != NULL) {
+    if (file_exception != NULL) {
         delete the_read_PTR;
-        PyErr_SetString(PyExc_IOError, exc);
+        PyErr_SetString(PyExc_OSError, file_exception);
+        return NULL;
+    }
+    if (value_exception != NULL) {
+        delete the_read_PTR;
+        PyErr_SetString(PyExc_ValueError, value_exception);
         return NULL;
     }
 
@@ -377,42 +332,39 @@ PyObject *
 _ReadPairIterator_iternext(khmer_ReadPairIterator_Object * myself)
 {
     khmer_ReadParser_Object * parent = (khmer_ReadParser_Object*)myself->parent;
-    IParser *           parser    = parent->parser;
-    uint8_t         pair_mode = myself->pair_mode;
+    IParser    *parser    = parent->parser;
+    uint8_t     pair_mode = myself->pair_mode;
 
     ReadPair    the_read_pair;
-    bool    stop_iteration      = false;
-    const char * value_error_what = NULL;
-    const char * io_error_what = NULL;
+    bool        stop_iteration  = false;
+    const char *value_exception = NULL;
+    const char *file_exception  = NULL;
 
     Py_BEGIN_ALLOW_THREADS
     stop_iteration = parser->is_complete( );
-    if (!stop_iteration)
+    if (!stop_iteration) {
         try {
             parser->imprint_next_read_pair( the_read_pair, pair_mode );
-        } catch (UnknownPairReadingMode &exc) {
-            value_error_what = exc.what();
-        } catch (InvalidRead &exc) {
-            io_error_what = exc.what();
-        } catch (InvalidReadPair &exc) {
-            io_error_what = exc.what();
-        } catch (StreamReadError &exc) {
-            io_error_what = "Input file error.";
         } catch (NoMoreReadsAvailable &exc) {
             stop_iteration = true;
+        } catch (khmer_file_exception &exc) {
+            file_exception = exc.what();
+        } catch (khmer_value_exception &exc) {
+            value_exception = exc.what();
         }
+    }
     Py_END_ALLOW_THREADS
 
     // Note: Can return NULL instead of setting the StopIteration exception.
     if (stop_iteration) {
         return NULL;
     }
-    if (value_error_what != NULL) {
-        PyErr_SetString(PyExc_ValueError, value_error_what);
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
         return NULL;
     }
-    if (io_error_what != NULL) {
-        PyErr_SetString( PyExc_IOError, io_error_what);
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
         return NULL;
     }
 
@@ -534,7 +486,9 @@ static PyGetSetDef khmer_ReadParser_accessors[] = {
     {NULL, NULL, NULL, NULL, NULL} /* Sentinel */
 };
 
-static PyTypeObject khmer_ReadParser_Type = {
+static PyTypeObject khmer_ReadParser_Type
+CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_ReadParser_Object")
+= {
     PyVarObject_HEAD_INIT(NULL, 0)             /* init & ob_size */
     "_khmer.ReadParser",                        /* tp_name */
     sizeof(khmer_ReadParser_Object),           /* tp_basicsize */
@@ -587,6 +541,10 @@ void _init_ReadParser_Type_constants()
     int result;
 
     PyObject * value = PyLong_FromLong( IParser:: PAIR_MODE_ALLOW_UNPAIRED );
+    if (value == NULL) {
+        Py_DECREF(cls_attrs_DICT);
+        return;
+    }
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_ALLOW_UNPAIRED", value);
     Py_XDECREF(value);
@@ -596,6 +554,10 @@ void _init_ReadParser_Type_constants()
     }
 
     value = PyLong_FromLong( IParser:: PAIR_MODE_IGNORE_UNPAIRED );
+    if (value == NULL) {
+        Py_DECREF(cls_attrs_DICT);
+        return;
+    }
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_IGNORE_UNPAIRED", value );
     Py_XDECREF(value);
@@ -605,6 +567,10 @@ void _init_ReadParser_Type_constants()
     }
 
     value = PyLong_FromLong( IParser:: PAIR_MODE_ERROR_ON_UNPAIRED );
+    if (value == NULL) {
+        Py_DECREF(cls_attrs_DICT);
+        return;
+    }
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_ERROR_ON_UNPAIRED", value);
     Py_XDECREF(value);
@@ -856,8 +822,11 @@ hashtable_consume_fasta(khmer_KHashtable_Object * me, PyObject * args)
     unsigned int          total_reads   = 0;
     try {
         hashtable->consume_fasta(filename, total_reads, n_consumed);
-    } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -881,12 +850,30 @@ hashtable_consume_fasta_with_reads_parser(khmer_KHashtable_Object * me,
         _PyObject_to_khmer_ReadParser( rparser_obj );
 
     // call the C++ function, and trap signals => Python
-    unsigned long long  n_consumed  = 0;
-    unsigned int    total_reads = 0;
+    unsigned long long  n_consumed      = 0;
+    unsigned int        total_reads     = 0;
+    const char         *value_exception = NULL;
+    const char         *file_exception  = NULL;
+
     Py_BEGIN_ALLOW_THREADS
-    hashtable->consume_fasta(rparser, total_reads, n_consumed);
+    try {
+        hashtable->consume_fasta(rparser, total_reads, n_consumed);
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
+    } catch (khmer_value_exception &exc) {
+        value_exception = exc.what();
+    }
     Py_END_ALLOW_THREADS
 
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
+        return NULL;
+    }
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
+        return NULL;
+    }
+
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
@@ -974,7 +961,7 @@ hashtable_load(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->load(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -996,7 +983,7 @@ hashtable_save(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->save(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1146,7 +1133,15 @@ hashtable_consume_fasta_and_tag(khmer_KHashtable_Object * me, PyObject * args)
     unsigned long long n_consumed;
     unsigned int total_reads;
 
-    hashtable->consume_fasta_and_tag(filename, total_reads, n_consumed);
+    try {
+        hashtable->consume_fasta_and_tag(filename, total_reads, n_consumed);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
@@ -1272,7 +1267,7 @@ hashtable_load_stop_tags(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->load_stop_tags(filename, clear_tags);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1295,7 +1290,7 @@ hashtable_save_stop_tags(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->save_stop_tags(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1450,11 +1445,18 @@ hashtable_do_subset_partition(khmer_KHashtable_Object * me, PyObject * args)
 
     khmer_KSubsetPartition_Object * subset_obj = (khmer_KSubsetPartition_Object *)\
             PyObject_New(khmer_KSubsetPartition_Object, &khmer_KSubsetPartition_Type);
+
+    if (subset_obj == NULL) {
+        delete subset_p;
+        return NULL;
+    }
+
     subset_obj->subset = subset_p;
 
-    return (PyObject *)subset_obj;
+    return (PyObject *) subset_obj;
 }
 
+
 static
 PyObject *
 hashtable_join_partitions_by_path(khmer_KHashtable_Object * me, PyObject * args)
@@ -1477,12 +1479,13 @@ hashtable_merge_subset(khmer_KHashtable_Object * me, PyObject * args)
 {
     Hashtable * hashtable = me->hashtable;
 
-    khmer_KSubsetPartition_Object * subset_obj;
-    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type, &subset_obj)) {
+    khmer_KSubsetPartition_Object * subset_obj = NULL;
+    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type,
+                          &subset_obj)) {
         return NULL;
     }
-    SubsetPartition * subset_p;
-    subset_p = subset_obj->subset;
+
+    SubsetPartition * subset_p = subset_obj->subset;
 
     hashtable->partition->merge(subset_p);
 
@@ -1503,7 +1506,7 @@ hashtable_merge_from_disk(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->partition->merge_from_disk(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1527,22 +1530,28 @@ hashtable_consume_fasta_and_tag_with_reads_parser(khmer_KHashtable_Object * me,
     read_parsers:: IParser * rparser = rparser_obj-> parser;
 
     // call the C++ function, and trap signals => Python
-    unsigned long long  n_consumed  = 0;
-    unsigned int          total_reads = 0;
-    char const * exc = NULL;
+    const char         *value_exception = NULL;
+    const char         *file_exception  = NULL;
+    unsigned long long  n_consumed      = 0;
+    unsigned int        total_reads     = 0;
+
     Py_BEGIN_ALLOW_THREADS
     try {
-        hashtable->consume_fasta_and_tag(
-            rparser, total_reads, n_consumed
-        );
-    } catch (khmer::read_parsers::NoMoreReadsAvailable &e) {
-        exc = e.what();
+        hashtable->consume_fasta_and_tag(rparser, total_reads, n_consumed);
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
+    } catch (khmer_value_exception &exc) {
+        value_exception = exc.what();
     }
     Py_END_ALLOW_THREADS
-    if (exc != NULL) {
-        PyErr_SetString(PyExc_IOError, exc);
+
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
         return NULL;
     }
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
+    }
 
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
@@ -1567,14 +1576,16 @@ hashtable_consume_fasta_and_tag_with_stoptags(khmer_KHashtable_Object * me,
 
     // call the C++ function, and trap signals => Python
 
-    unsigned long long n_consumed;
-    unsigned int total_reads;
-
+    unsigned long long  n_consumed;
+    unsigned int        total_reads;
     try {
         hashtable->consume_fasta_and_tag_with_stoptags(filename,
                 total_reads, n_consumed);
-    } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -1601,8 +1612,11 @@ hashtable_consume_partitioned_fasta(khmer_KHashtable_Object * me,
 
     try {
         hashtable->consume_partitioned_fasta(filename, total_reads, n_consumed);
-    } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -1724,7 +1738,7 @@ hashtable_get_stop_tags(khmer_KHashtable_Object * me, PyObject * args)
     PyObject * x = PyList_New(hashtable->stop_tags.size());
     unsigned long long i = 0;
     for (si = hashtable->stop_tags.begin(); si != hashtable->stop_tags.end();
-            si++) {
+            ++si) {
         std::string s = _revhash(*si, k);
         PyList_SET_ITEM(x, i, Py_BuildValue("s", s.c_str()));
         i++;
@@ -1748,7 +1762,8 @@ hashtable_get_tagset(khmer_KHashtable_Object * me, PyObject * args)
 
     PyObject * x = PyList_New(hashtable->all_tags.size());
     unsigned long long i = 0;
-    for (si = hashtable->all_tags.begin(); si != hashtable->all_tags.end(); si++) {
+    for (si = hashtable->all_tags.begin(); si != hashtable->all_tags.end();
+	    ++si) {
         std::string s = _revhash(*si, k);
         PyList_SET_ITEM(x, i, Py_BuildValue("s", s.c_str()));
         i++;
@@ -1785,7 +1800,10 @@ hashtable_output_partitions(khmer_KHashtable_Object * me, PyObject * args)
                        output,
                        output_unassigned);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -1811,9 +1829,17 @@ hashtable_find_unpart(khmer_KHashtable_Object * me, PyObject * args)
     bool stop_big_traversals = PyObject_IsTrue(stop_big_traversals_o);
     unsigned int n_singletons = 0;
 
-    SubsetPartition * subset_p = hashtable->partition;
-    n_singletons = subset_p->find_unpart(filename, traverse,
-                                         stop_big_traversals);
+    try {
+        SubsetPartition * subset_p = hashtable->partition;
+        n_singletons = subset_p->find_unpart(filename, traverse,
+                                             stop_big_traversals);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     return PyLong_FromLong(n_singletons);
 }
@@ -1831,7 +1857,15 @@ hashtable_filter_if_present(khmer_KHashtable_Object * me, PyObject * args)
         return NULL;
     }
 
-    hashtable->filter_if_present(filename, output);
+    try {
+        hashtable->filter_if_present(filename, output);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -1851,7 +1885,7 @@ hashtable_save_partitionmap(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->partition->save_partitionmap(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1873,7 +1907,7 @@ hashtable_load_partitionmap(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->partition->load_partitionmap(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -1918,10 +1952,12 @@ hashtable_subset_count_partitions(khmer_KHashtable_Object * me, PyObject * args)
 {
     khmer_KSubsetPartition_Object * subset_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type, &subset_obj)) {
+    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type,
+                          &subset_obj)) {
         return NULL;
     }
 
+
     size_t n_partitions = 0, n_unassigned = 0;
     subset_obj->subset->count_partitions(n_partitions, n_unassigned);
 
@@ -1935,12 +1971,12 @@ hashtable_subset_partition_size_distribution(khmer_KHashtable_Object * me,
         PyObject * args)
 {
     khmer_KSubsetPartition_Object * subset_obj = NULL;
-    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type, &subset_obj)) {
+    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type,
+                          &subset_obj)) {
         return NULL;
     }
 
-    SubsetPartition * subset_p;
-    subset_p = subset_obj->subset;
+    SubsetPartition * subset_p = subset_obj->subset;
 
     PartitionCountDistribution d;
 
@@ -1954,7 +1990,7 @@ hashtable_subset_partition_size_distribution(khmer_KHashtable_Object * me,
     PartitionCountDistribution::iterator di;
 
     unsigned int i;
-    for (i = 0, di = d.begin(); di != d.end(); di++, i++) {
+    for (i = 0, di = d.begin(); di != d.end(); ++di, i++) {
         PyObject * value =  Py_BuildValue("KK", di->first, di->second);
         if (value == NULL) {
             Py_DECREF(x);
@@ -1995,7 +2031,7 @@ hashtable_load_tagset(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->load_tagset(filename, clear_tags);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -2017,7 +2053,7 @@ hashtable_save_tagset(khmer_KHashtable_Object * me, PyObject * args)
     try {
         hashtable->save_tagset(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -2032,19 +2068,19 @@ hashtable_save_subset_partitionmap(khmer_KHashtable_Object * me,
     const char * filename = NULL;
     khmer_KSubsetPartition_Object * subset_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "O!s", &khmer_KSubsetPartition_Type, &subset_obj, &filename)) {
+    if (!PyArg_ParseTuple(args, "O!s", &khmer_KSubsetPartition_Type,
+                          &subset_obj, &filename)) {
         return NULL;
     }
 
-    SubsetPartition * subset_p;
-    subset_p = subset_obj->subset;
+    SubsetPartition * subset_p = subset_obj->subset;
 
     Py_BEGIN_ALLOW_THREADS
 
     try {
         subset_p->save_partitionmap(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -2073,30 +2109,33 @@ hashtable_load_subset_partitionmap(khmer_KHashtable_Object * me,
         return PyErr_NoMemory();
     }
 
-    bool fail = false;
-    std::string err;
+    const char         *file_exception  = NULL;
 
     Py_BEGIN_ALLOW_THREADS
-
     try {
         subset_p->load_partitionmap(filename);
-    } catch (khmer_file_exception &e) {
-        fail = true;
-        err = e.what();
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
     }
-
     Py_END_ALLOW_THREADS
 
-    if (fail) {
-        PyErr_SetString(PyExc_IOError, err.c_str());
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
         delete subset_p;
         return NULL;
-    } else {
-        khmer_KSubsetPartition_Object * subset_obj = (khmer_KSubsetPartition_Object *)\
-           PyObject_New(khmer_KSubsetPartition_Object, &khmer_KSubsetPartition_Type);
-        subset_obj->subset = subset_p;
-        return (PyObject*) subset_obj;
     }
+
+    khmer_KSubsetPartition_Object * subset_obj = (khmer_KSubsetPartition_Object *)\
+            PyObject_New(khmer_KSubsetPartition_Object, &khmer_KSubsetPartition_Type);
+
+    if (subset_obj == NULL) {
+        delete subset_p;
+        return NULL;
+    }
+
+    subset_obj->subset = subset_p;
+
+    return (PyObject *) subset_obj;
 }
 
 static
@@ -2137,11 +2176,14 @@ hashtable__validate_subset_partitionmap(khmer_KHashtable_Object * me,
 {
     khmer_KSubsetPartition_Object * subset_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type, &subset_obj)) {
+    if (!PyArg_ParseTuple(args, "O!", &khmer_KSubsetPartition_Type,
+                          &subset_obj)) {
         return NULL;
     }
 
-    subset_obj->subset->_validate_pmap();
+    SubsetPartition * subset_p = subset_obj->subset;
+
+    subset_p->_validate_pmap();
 
     Py_RETURN_NONE;
 }
@@ -2243,7 +2285,7 @@ hashtable_divide_tags_into_subsets(khmer_KHashtable_Object * me,
     PyObject * x = PyList_New(divvy.size());
     unsigned int i = 0;
     for (SeenSet::const_iterator si = divvy.begin(); si != divvy.end();
-            si++, i++) {
+            ++si, i++) {
         PyList_SET_ITEM(x, i, PyLong_FromUnsignedLongLong(*si));
     }
 
@@ -2681,7 +2723,7 @@ count_find_spectral_error_positions(khmer_KCountingHash_Object * me,
 {
     khmer::CountingHash * counting = me->counting;
 
-    char * seq = NULL;
+    const char * seq = NULL;
     khmer::BoundedCounterType max_count = 0; // unsigned short int
 
     if (!PyArg_ParseTuple(args, "sH", &seq, &max_count)) {
@@ -2724,8 +2766,16 @@ count_fasta_dump_kmers_by_abundance(khmer_KCountingHash_Object * me,
         return NULL;
     }
 
-    counting->fasta_dump_kmers_by_abundance(inputfile,
-                                            limit_by);
+    try {
+        counting->fasta_dump_kmers_by_abundance(inputfile,
+                                                limit_by);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -2914,8 +2964,17 @@ count_fasta_count_kmers_by_position(khmer_KCountingHash_Object * me,
     max_read_len = (unsigned int) max_read_len_long;
 
     unsigned long long * counts;
-    counts = counting->fasta_count_kmers_by_position(inputfile, max_read_len,
-             (unsigned short) limit_by_count_int);
+    try {
+        counts = counting->fasta_count_kmers_by_position(inputfile,
+                                                         max_read_len,
+                                        (unsigned short) limit_by_count_int);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     PyObject * x = PyList_New(max_read_len);
     if (x == NULL) {
@@ -2951,15 +3010,31 @@ count_abundance_distribution_with_reads_parser(khmer_KCountingHash_Object * me,
         return NULL;
     }
 
-    read_parsers:: IParser * rparser = rparser_obj->parser;
-    Hashbits * hashbits = tracking_obj->hashbits;
-
-    HashIntoType * dist = NULL;
+    read_parsers::IParser *rparser      = rparser_obj->parser;
+    Hashbits           *hashbits        = tracking_obj->hashbits;
+    HashIntoType       *dist            = NULL;
+    const char         *value_exception = NULL;
+    const char         *file_exception  = NULL;
 
     Py_BEGIN_ALLOW_THREADS
-    dist = counting->abundance_distribution(rparser, hashbits);
+    try {
+        dist = counting->abundance_distribution(rparser, hashbits);
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
+    } catch (khmer_value_exception &exc) {
+        value_exception = exc.what();
+    }
     Py_END_ALLOW_THREADS
 
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
+        return NULL;
+    }
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
+        return NULL;
+    }
+
     PyObject * x = PyList_New(MAX_BIGCOUNT + 1);
     if (x == NULL) {
         delete[] dist;
@@ -2986,35 +3061,49 @@ count_abundance_distribution(khmer_KCountingHash_Object * me, PyObject * args)
         return NULL;
     }
 
-    Hashbits * hashbits = tracking_obj->hashbits;
-    HashIntoType * dist;
-
-    char const * result = "";
-    bool exception = false;
+    Hashbits           *hashbits        = tracking_obj->hashbits;
+    HashIntoType       *dist            = NULL;
+    const char         *value_exception = NULL;
+    const char         *file_exception  = NULL;
     Py_BEGIN_ALLOW_THREADS
     try {
         dist = counting->abundance_distribution(filename, hashbits);
-    } catch (khmer_file_exception &e) {
-        exception = true;
-        result = e.what();
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
+    } catch (khmer_value_exception &exc) {
+        value_exception = exc.what();
     }
     Py_END_ALLOW_THREADS
 
-    if (exception) {
-        PyErr_SetString(PyExc_IOError, result);
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
+        if (dist != NULL) {
+            delete []dist;
+        }
+        return NULL;
+    }
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
+        if (dist != NULL) {
+            delete []dist;
+        }
         return NULL;
     }
 
     PyObject * x = PyList_New(MAX_BIGCOUNT + 1);
     if (x == NULL) {
-        delete[] dist;
+        if (dist != NULL) {
+            delete []dist;
+        }
         return NULL;
     }
     for (int i = 0; i < MAX_BIGCOUNT + 1; i++) {
         PyList_SET_ITEM(x, i, PyLong_FromUnsignedLongLong(dist[i]));
     }
 
-    delete[] dist;
+    if (dist != NULL) {
+        delete []dist;
+    }
 
     return x;
 }
@@ -3223,7 +3312,7 @@ static PyObject* _new_counting_hash(PyTypeObject * type, PyObject * args,
             Py_DECREF(self);
             return PyErr_NoMemory();
         }
-        self->khashtable.hashtable = (Hashtable *) self->counting;
+        self->khashtable.hashtable = dynamic_cast<Hashtable*>(self->counting);
     }
 
     return (PyObject *) self;
@@ -3247,14 +3336,18 @@ hashbits_count_overlap(khmer_KHashbits_Object * me, PyObject * args)
 
 // call the C++ function, and trap signals => Python
 
-    unsigned long long n_consumed;
-    unsigned int total_reads;
-    HashIntoType curve[2][100];
+    HashIntoType        curve[2][100];
 
     try {
-        hashbits->consume_fasta_overlap(filename, curve, *ht2, total_reads, n_consumed);
-    } catch (InvalidStreamHandle &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+	unsigned long long  n_consumed;
+	unsigned int        total_reads;
+        hashbits->consume_fasta_overlap(filename, curve, *ht2, total_reads,
+                                        n_consumed);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -3438,7 +3531,7 @@ subset_partition_size_distribution(khmer_KSubsetPartition_Object * me,
     PartitionCountDistribution::iterator di;
 
     unsigned int i;
-    for (i = 0, di = d.begin(); di != d.end(); di++, i++) {
+    for (i = 0, di = d.begin(); di != d.end(); ++di, i++) {
         PyObject * tup = Py_BuildValue("KK", di->first, di->second);
         if (tup != NULL) {
             PyList_SET_ITEM(x, i, tup);
@@ -3472,7 +3565,7 @@ subset_partition_sizes(khmer_KSubsetPartition_Object * me, PyObject * args)
 
     unsigned int i = 0;
     PartitionCountMap::const_iterator mi;
-    for (mi = cm.begin(); mi != cm.end(); mi++) {
+    for (mi = cm.begin(); mi != cm.end(); ++mi) {
         if (mi->second >= min_size) {
             i++;
         }
@@ -3484,7 +3577,7 @@ subset_partition_sizes(khmer_KSubsetPartition_Object * me, PyObject * args)
     }
 
     // this should probably be a dict. @CTB
-    for (i = 0, mi = cm.begin(); mi != cm.end(); mi++) {
+    for (i = 0, mi = cm.begin(); mi != cm.end(); ++mi) {
         if (mi->second >= min_size) {
             PyObject * tup = Py_BuildValue("II", mi->first, mi->second);
             if (tup != NULL) {
@@ -3525,7 +3618,7 @@ subset_partition_average_coverages(khmer_KSubsetPartition_Object * me,
     }
 
     // this should probably be a dict. @CTB
-    for (i = 0, mi = cm.begin(); mi != cm.end(); mi++, i++) {
+    for (i = 0, mi = cm.begin(); mi != cm.end(); ++mi, i++) {
         PyObject * tup = Py_BuildValue("II", mi->first, mi->second);
         if (tup != NULL) {
             PyList_SET_ITEM(x, i, tup);
@@ -3673,32 +3766,37 @@ labelhash_consume_fasta_and_tag_with_labels(khmer_KLabelHash_Object * me,
 {
     LabelHash * hb = me->labelhash;
 
-    std::ofstream outfile;
-
     const char * filename;
 
     if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
-    unsigned long long n_consumed;
-    unsigned int total_reads;
-    char const * exc = NULL;
+    const char         *value_exception = NULL;
+    const char         *file_exception  = NULL;
+    unsigned long long  n_consumed      = 0;
+    unsigned int        total_reads     = 0;
     //Py_BEGIN_ALLOW_THREADS
     try {
         hb->consume_fasta_and_tag_with_labels(filename, total_reads,
                                               n_consumed);
-    } catch (khmer_file_exception &e) {
-        exc = e.what();
+    } catch (khmer_file_exception &exc) {
+        file_exception = exc.what();
+    } catch (khmer_value_exception &exc) {
+        value_exception = exc.what();
     }
     //Py_END_ALLOW_THREADS
-    if (exc != NULL) {
-        PyErr_SetString(PyExc_IOError, exc);
+
+    if (file_exception != NULL) {
+        PyErr_SetString(PyExc_OSError, file_exception);
+        return NULL;
+    }
+    if (value_exception != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_exception);
         return NULL;
     }
 
     return Py_BuildValue("IK", total_reads, n_consumed);
-
 }
 
 static
@@ -3716,16 +3814,20 @@ labelhash_consume_partitioned_fasta_and_tag_with_labels(
 
     // call the C++ function, and trap signals => Python
 
-    unsigned long long n_consumed;
-    unsigned int total_reads;
+    unsigned long long  n_consumed  = 0;
+    unsigned int        total_reads = 0;
 
     try {
         labelhash->consume_partitioned_fasta_and_tag_with_labels(filename,
                 total_reads, n_consumed);
-    } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
+
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
@@ -3742,6 +3844,7 @@ labelhash_consume_sequence_and_tag_with_labels(khmer_KLabelHash_Object * me,
     }
     unsigned long long n_consumed = 0;
     Label * the_label = hb->check_and_allocate_label(c);
+
     hb->consume_sequence_and_tag_with_labels(seq, n_consumed, *the_label);
     return Py_BuildValue("K", n_consumed);
 }
@@ -3929,7 +4032,7 @@ labelhash_save_labels_and_tags(khmer_KLabelHash_Object * me, PyObject * args)
     try {
         labelhash->save_labels_and_tags(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -3950,7 +4053,7 @@ labelhash_load_labels_and_tags(khmer_KLabelHash_Object * me, PyObject * args)
     try {
         labelhash->load_labels_and_tags(filename);
     } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+        PyErr_SetString(PyExc_OSError, e.what());
         return NULL;
     }
 
@@ -4042,12 +4145,14 @@ hashtable_repartition_largest_partition(khmer_KHashtable_Object * me,
     SubsetPartition * subset_p;
     unsigned int distance, threshold, frequency;
 
-    if (!PyArg_ParseTuple(args, "OO!III", &subset_o, &khmer_KCountingHash_Type,
-        &counting_o, &distance, &threshold, &frequency)) {
+    if (!PyArg_ParseTuple(args, "OO!III",
+                          &subset_o,
+                          &khmer_KCountingHash_Type, &counting_o,
+                          &distance, &threshold, &frequency)) {
         return NULL;
     }
 
-    if (subset_o != Py_None) {
+    if (PyObject_TypeCheck(subset_o, &khmer_KSubsetPartition_Type)) {
         subset_p = ((khmer_KSubsetPartition_Object *) subset_o)->subset;
     } else {
         subset_p = hashtable->partition;
@@ -4093,8 +4198,110 @@ static PyObject * readaligner_align(khmer_ReadAligner_Object * me,
     return ret;
 }
 
+static PyObject * readaligner_align_forward(khmer_ReadAligner_Object * me,
+                                            PyObject * args)
+{
+    ReadAligner * aligner = me->aligner;
+
+    const char * read;
+
+    if (!PyArg_ParseTuple(args, "s", &read)) {
+        return NULL;
+    }
+
+    /*if (strlen(read) < (unsigned int)aligner->ksize()) {
+        PyErr_SetString(PyExc_ValueError,
+                        "string length must >= the hashtable k-mer size");
+        return NULL;
+    }*/
+
+    Alignment * aln;
+    aln = aligner->AlignForward(read);
+
+    const char* alignment = aln->graph_alignment.c_str();
+    const char* readAlignment = aln->read_alignment.c_str();
+    PyObject * x = PyList_New(aln->covs.size());
+    for (size_t i = 0; i < aln->covs.size(); i++ ){
+      PyList_SET_ITEM(x, i, PyLong_FromLong(aln->covs[i]));
+    }
+
+    PyObject * ret = Py_BuildValue("dssOO", aln->score, alignment,
+                                   readAlignment,
+                                   (aln->truncated)? Py_True : Py_False,
+                                   x);
+    delete aln;
+    Py_DECREF(x);
+
+    return ret;
+}
+
+static PyObject* khmer_ReadAligner_get_scoring_matrix(
+    khmer_ReadAligner_Object * me, PyObject * args)
+{
+
+    if (!PyArg_ParseTuple(args, "")) {
+        return NULL;
+    }
+    ScoringMatrix matrix = me->aligner->getScoringMatrix();
+
+    return Py_BuildValue( "dddd", matrix.trusted_match, matrix.trusted_mismatch,
+                          matrix.untrusted_match, matrix.untrusted_mismatch);
+}
+
+static PyObject* khmer_ReadAligner_get_transition_probabilities(
+    khmer_ReadAligner_Object * me, PyObject * args)
+{
+
+    if (!PyArg_ParseTuple(args, "")) {
+        return NULL;
+    }
+    ScoringMatrix matrix = me->aligner->getScoringMatrix();
+
+    return Py_BuildValue( "(dddddd)(dddd)(dddd)(dddddd)(dddd)(dddd)",
+                          matrix.tsc[0], matrix.tsc[1], matrix.tsc[2],
+                          matrix.tsc[3], matrix.tsc[4], matrix.tsc[5],
+                          matrix.tsc[6], matrix.tsc[7], matrix.tsc[8],
+                          matrix.tsc[9], matrix.tsc[10], matrix.tsc[11],
+                          matrix.tsc[12], matrix.tsc[13], matrix.tsc[14],
+                          matrix.tsc[15], matrix.tsc[16], matrix.tsc[17],
+                          matrix.tsc[18], matrix.tsc[19], matrix.tsc[20],
+                          matrix.tsc[21], matrix.tsc[22], matrix.tsc[23],
+                          matrix.tsc[24], matrix.tsc[25], matrix.tsc[26],
+                          matrix.tsc[27]);
+}
+
 static PyMethodDef khmer_ReadAligner_methods[] = {
     {"align", (PyCFunction)readaligner_align, METH_VARARGS, ""},
+    {"align_forward", (PyCFunction)readaligner_align_forward, METH_VARARGS, ""},
+    {
+        "get_scoring_matrix", (PyCFunction)khmer_ReadAligner_get_scoring_matrix,
+        METH_VARARGS,
+        "Get the scoring matrix in use.\n\n\
+Returns a tuple of floats: (trusted_match, trusted_mismatch, untrusted_match, \
+untrusted_mismatch)"
+    },
+    {
+        "get_transition_probabilities",
+        (PyCFunction)khmer_ReadAligner_get_transition_probabilities,
+        METH_VARARGS,
+        "Get the transition probabilties in use.\n\n\
+HMM state notation abbreviations:\n\
+    M_t - trusted match; M_u - untrusted match\n\
+    Ir_t - trusted read insert; Ir_u - untrusted read insert\n\
+    Ig_t - trusted graph insert; Ig_u - untrusted graph insert\n\
+\
+Returns a sparse matrix as a tuple of six tuples.\n\
+The inner tuples contain 6, 4, 4, 6, 4, and 4 floats respectively.\n\
+Transition are notated as 'StartState-NextState':\n\
+(\n\
+  ( M_t-M_t,  M_t-Ir_t,  M_t-Ig_t,  M_t-M_u,  M_t-Ir_u,  M_t-Ig_u),\n\
+  (Ir_t-M_t, Ir_t-Ir_t,            Ir_t-M_u, Ir_t-Ir_u           ),\n\
+  (Ig_t-M_t,          , Ig_t-Ig_t, Ig_t-M_u,            Ig_t-Ig_u),\n\
+  ( M_u-M_t,  M_u-Ir_t,  M_u-Ig_t,  M_u-M_u,  M_u-Ir_u,  M_u-Ig_u),\n\
+  (Ir_u-M_t, Ir_u-Ir_t,            Ir_u-M_u, Ir_u-Ir_u           ),\n\
+  (Ig_u-M_t,          , Ig_u-Ig_t, Ig_u-M_u,            Ig_u-Ig_u)\n\
+)"
+    },
     {NULL} /* Sentinel */
 };
 
@@ -4123,15 +4330,31 @@ static PyObject* khmer_ReadAligner_new(PyTypeObject *type, PyObject * args,
         khmer_KCountingHash_Object * ch = NULL;
         unsigned short int trusted_cov_cutoff = 2;
         double bits_theta = 1;
-
-        if(!PyArg_ParseTuple(args, "O!Hd", &khmer_KCountingHash_Type, &ch,
-                             &trusted_cov_cutoff, &bits_theta)) {
+        double scoring_matrix[] = { 0, 0, 0, 0 };
+        double * transitions = new double[28];
+
+        if(!PyArg_ParseTuple(
+                    args,
+                    "O!Hd|(dddd)((dddddd)(dddd)(dddd)(dddddd)(dddd)(dddd))",
+                    &khmer_KCountingHash_Type, &ch, &trusted_cov_cutoff,
+                    &bits_theta, &scoring_matrix[0], &scoring_matrix[1],
+                    &scoring_matrix[2], &scoring_matrix[3], &transitions[0],
+                    &transitions[1], &transitions[2], &transitions[3],
+                    &transitions[4], &transitions[5], &transitions[6],
+                    &transitions[7], &transitions[8], &transitions[9],
+                    &transitions[10], &transitions[11], &transitions[12],
+                    &transitions[13], &transitions[14], &transitions[15],
+                    &transitions[16], &transitions[17], &transitions[18],
+                    &transitions[19], &transitions[20], &transitions[21],
+                    &transitions[22], &transitions[23], &transitions[24],
+                    &transitions[25], &transitions[26], &transitions[27])) {
             Py_DECREF(self);
             return NULL;
         }
 
         self->aligner = new ReadAligner(ch->counting, trusted_cov_cutoff,
-                                        bits_theta);
+                                        bits_theta, scoring_matrix,
+                                        transitions);
     }
 
     return (PyObject *) self;
@@ -4157,7 +4380,7 @@ static PyTypeObject khmer_ReadAlignerType = {
     0,                          /*tp_getattro*/
     0,                          /*tp_setattro*/
     0,                          /*tp_as_buffer*/
-    Py_TPFLAGS_DEFAULT,         /*tp_flags*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /*tp_flags*/
     "ReadAligner object",           /* tp_doc */
     0,                         /* tp_traverse */
     0,                         /* tp_clear */
@@ -4197,9 +4420,16 @@ hashtable_consume_fasta_and_traverse(khmer_KHashtable_Object * me,
 
     CountingHash * counting = counting_o->counting;
 
-    hashtable->consume_fasta_and_traverse(filename, radius, big_threshold,
-                                          transfer_threshold, *counting);
-
+    try {
+        hashtable->consume_fasta_and_traverse(filename, radius, big_threshold,
+                                              transfer_threshold, *counting);
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -4355,8 +4585,11 @@ static PyObject * hllcounter_consume_fasta(khmer_KHLLCounter_Object * me,
     unsigned int        total_reads   = 0;
     try {
         me->hllcounter->consume_fasta(filename, total_reads, n_consumed);
-    } catch (khmer_file_exception &e) {
-        PyErr_SetString(PyExc_IOError, e.what());
+    } catch (khmer_file_exception &exc) {
+        PyErr_SetString(PyExc_OSError, exc.what());
+        return NULL;
+    } catch (khmer_value_exception &exc) {
+        PyErr_SetString(PyExc_ValueError, exc.what());
         return NULL;
     }
 
@@ -4609,9 +4842,9 @@ static PyObject * forward_hash(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    PyObject * hash;
     try {
-        hash = PyLong_FromUnsignedLongLong(_hash(kmer, ksize));
+        PyObject * hash;
+	hash = PyLong_FromUnsignedLongLong(_hash(kmer, ksize));
         return hash;
     } catch (khmer_exception &e) {
         PyErr_SetString(PyExc_RuntimeError, e.what());
diff --git a/khmer/_version.py b/khmer/_version.py
index 5f61635..5da9804 100644
--- a/khmer/_version.py
+++ b/khmer/_version.py
@@ -16,8 +16,8 @@ import subprocess
 import sys
 
 # these strings will be replaced by git during git-archive
-git_refnames = " (tag: v2.0-rc1)"
-git_full = "bbd38a6d3d0960f71c65dd46ecda3b61584a8b4c"
+git_refnames = " (HEAD -> master, tag: v2.0-rc2)"
+git_full = "8c2f8d33969ad402dac2c9bacbfc02197bd1ce02"
 
 # these strings are filled in when 'setup.py versioneer' creates _version.py
 tag_prefix = "v"
diff --git a/khmer/kfile.py b/khmer/kfile.py
index 9a01f59..a901833 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -12,7 +12,7 @@ from __future__ import print_function, unicode_literals
 import os
 import sys
 import errno
-from stat import S_ISBLK, S_ISFIFO
+from stat import S_ISBLK, S_ISFIFO, S_ISCHR
 from khmer import khmer_args
 
 
@@ -27,6 +27,7 @@ def check_input_files(file_path, force):
 
     if file_path == '-':
         return
+
     try:
         mode = os.stat(file_path).st_mode
     except OSError:
@@ -34,25 +35,31 @@ def check_input_files(file_path, force):
               file_path, file=sys.stderr)
 
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             print("Exiting", file=sys.stderr)
             sys.exit(1)
         else:
             return
 
-    # block devices will be nonzero
-    if S_ISBLK(mode) or S_ISFIFO(mode):
+    # block devices/stdin will be nonzero
+    if S_ISBLK(mode) or S_ISFIFO(mode) or S_ISCHR(mode):
         return
 
     if not os.path.exists(file_path):
         print("ERROR: Input file %s does not exist; exiting" %
               file_path, file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
     else:
         if os.stat(file_path).st_size == 0:
             print("ERROR: Input file %s is empty; exiting." %
                   file_path, file=sys.stderr)
             if not force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 sys.exit(1)
 
 
@@ -109,17 +116,18 @@ def check_space(in_files, force, _testhook_free_space=None):
         print("       Free space: %.1f GB"
               % (float(free_space) / 1e9,), file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
 
 
-def check_space_for_hashtable(args, hashtype, force,
+def check_space_for_hashtable(outfile_name, hash_size, force,
                               _testhook_free_space=None):
-    """Check we have enough size to write a hash table."""
-    hash_size = khmer_args._calculate_tablesize(args, hashtype)
+    """Check that we have enough size to write the specified hash table."""
 
-    cwd = os.getcwd()
-    dir_path = os.path.dirname(os.path.realpath(cwd))
+    dir_path = os.path.dirname(os.path.realpath(outfile_name))
     target = os.statvfs(dir_path)
+
     if _testhook_free_space is None:
         free_space = target.f_frsize * target.f_bavail
     else:
@@ -129,13 +137,15 @@ def check_space_for_hashtable(args, hashtype, force,
     if size_diff > 0:
         print("ERROR: Not enough free space on disk "
               "for saved table files;"
-              "       Need at least %s GB more."
+              "       Need at least %.1f GB more."
               % (float(size_diff) / 1e9,), file=sys.stderr)
         print("       Table size: %.1f GB"
               % (float(hash_size) / 1e9,), file=sys.stderr)
         print("       Free space: %.1f GB"
               % (float(free_space) / 1e9,), file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
 
 
@@ -148,8 +158,11 @@ def check_valid_file_exists(in_files):
     or non-existent.
     """
     for in_file in in_files:
-        if os.path.exists(in_file):
-            if os.stat(in_file).st_size > 0:
+        if in_file == '-':
+            pass
+        elif os.path.exists(in_file):
+            mode = os.stat(in_file).st_mode
+            if os.stat(in_file).st_size > 0 or S_ISBLK(mode) or S_ISFIFO(mode):
                 return
             else:
                 print('WARNING: Input file %s is empty' %
diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index a9e9358..946a645 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -60,6 +60,9 @@ def build_hash_args(descr=None, epilog=None, parser=None):
     parser.add_argument('--n_tables', '-N', type=int,
                         default=DEFAULT_N_TABLES,
                         help='number of k-mer counting tables to use')
+    parser.add_argument('-U', '--unique-kmers', type=int, default=0,
+                        help='approximate number of unique kmers in the input'
+                             ' set')
 
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--max-tablesize', '-x', type=float,
@@ -83,7 +86,6 @@ def build_counting_args(descr=None, epilog=None):
 
 def build_hashbits_args(descr=None, epilog=None, parser=None):
     """Build an ArgumentParser with args for hashbits based scripts."""
-
     parser = build_hash_args(descr=descr, epilog=epilog, parser=parser)
     parser.hashtype = 'nodegraph'
 
@@ -130,9 +132,9 @@ def add_loadhash_args(parser):
                         action=LoadAction)
 
 
-def _calculate_tablesize(args, hashtype, multiplier=1.0):
+def calculate_tablesize(args, hashtype, multiplier=1.0):
     if hashtype not in ('countgraph', 'nodegraph'):
-        raise Exception("unknown graph type: %s" % (hashtype,))
+        raise ValueError("unknown graph type: %s" % (hashtype,))
 
     if args.max_memory_usage:
         if hashtype == 'countgraph':
@@ -154,7 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args, 'nodegraph', multiplier=multiplier)
+    tablesize = calculate_tablesize(args, 'nodegraph', multiplier)
     return khmer.Hashbits(ksize, tablesize, args.n_tables)
 
 
@@ -165,7 +167,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args, 'countgraph', multiplier=multiplier)
+    tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier)
     return khmer.CountingHash(ksize, tablesize, args.n_tables)
 
 
@@ -177,12 +179,12 @@ def report_on_config(args, hashtype='countgraph'):
     """
     from khmer.utils import print_error
     if hashtype not in ('countgraph', 'nodegraph'):
-        raise Exception("unknown graph type: %s" % (hashtype,))
+        raise ValueError("unknown graph type: %s" % (hashtype,))
 
     if args.quiet:
         return
 
-    tablesize = _calculate_tablesize(args, hashtype)
+    tablesize = calculate_tablesize(args, hashtype)
 
     print_error("\nPARAMETERS:")
     print_error(" - kmer size =    {0} \t\t(-k)".format(args.ksize))
@@ -224,7 +226,8 @@ _algorithms = {
     'software': 'MR Crusoe et al., '
     '2014. http://dx.doi.org/10.6084/m9.figshare.979190',
     'diginorm': 'CT Brown et al., arXiv:1203.4802 [q-bio.GN]',
-    'streaming': 'Q Zhang, S Awad, CT Brown, unpublished',
+    'streaming': 'Q Zhang, S Awad, CT Brown, '
+    'https://dx.doi.org/10.7287/peerj.preprints.890v1',
     'graph': 'J Pell et al., http://dx.doi.org/10.1073/pnas.1121464109',
     'counting': 'Q Zhang et al., '
     'http://dx.doi.org/10.1371/journal.pone.0101271',
diff --git a/khmer/thread_utils.py b/khmer/thread_utils.py
index 41c3914..df997b3 100644
--- a/khmer/thread_utils.py
+++ b/khmer/thread_utils.py
@@ -26,7 +26,7 @@ DEFAULT_GROUPSIZE = 100
 
 def verbose_loader(filename):
     """Screed iterator that additionally prints progress info to stderr."""
-    screed_iter = screed.open(filename, parse_description=False)
+    screed_iter = screed.open(filename)
     for n, record in enumerate(screed_iter):
         if n % 100000 == 0:
             print('... filtering', n, file=sys.stderr)
diff --git a/khmer/utils.py b/khmer/utils.py
index 0e1d5e1..3abb11e 100644
--- a/khmer/utils.py
+++ b/khmer/utils.py
@@ -34,6 +34,9 @@ def check_is_pair(record1, record2):
 
     Handles both Casava formats: seq/1 and seq/2, and 'seq::... 1::...'
     and 'seq::... 2::...'.
+
+    Also handles the default format of the SRA toolkit's fastq-dump:
+    'Accession seq/1'
     """
     if hasattr(record1, 'quality') or hasattr(record2, 'quality'):
         if not (hasattr(record1, 'quality') and hasattr(record2, 'quality')):
@@ -47,8 +50,7 @@ def check_is_pair(record1, record2):
         subpart1 = lhs1.split('/', 1)[0]
         subpart2 = lhs2.split('/', 1)[0]
 
-        assert subpart1
-        if subpart1 == subpart2:
+        if subpart1 and subpart1 == subpart2:
             return True
 
     # handle '@name 1:rst'
@@ -57,7 +59,11 @@ def check_is_pair(record1, record2):
 
     # handle @name seq/1
     elif lhs1 == lhs2 and rhs1.endswith('/1') and rhs2.endswith('/2'):
-        return True
+        subpart1 = rhs1.split('/', 1)[0]
+        subpart2 = rhs2.split('/', 1)[0]
+
+        if subpart1 and subpart1 == subpart2:
+            return True
 
     return False
 
diff --git a/lib/Makefile b/lib/Makefile
index f3518ea..2bd68d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,3 +1,8 @@
+# Should we use the standard system zlib and libbz2?
+
+USE_SYSTEM_ZLIB ?= false
+USE_SYSTEM_LIBBZ2 ?= false
+
 # Profile?
 # Set this variable to true if you wish to profile the codes.
 WANT_PROFILING=false
@@ -23,53 +28,61 @@ WANT_EXTRA_SANITY_CHECKING=false
 # when optimization is turned on).
 WANT_DEBUGGING=false
 
-# Compile with performance metrics turned on?
-# Set this variable to true if you want to use instrumentation provided
-# in the sources for performance measurement purposes
-# and are willing to accept the overhead such instrumentation introduces.
-WITH_INTERNAL_METRICS=false
-
-
 PREFIX=/usr/local
 
 ### NOTE: No user-serviceable parts below this line! ###
 
 INCLUDES= -I ../third-party/seqan/core/include/ 	\
-	  -I ../third-party/zlib/ 			\
-	  -I ../third-party/bzip2/ 			\
 	  -I ../third-party/smhasher/
 
-CXXFLAGS=$(INCLUDES)
-CXX_WARNING_FLAGS=-Wall
-CXX_OPTIMIZATION_FLAGS=-O3
-CXX_SHARED_LIB_FLAGS=-fPIC
-CXXFLAGS+= 				\
-	   $(CXX_WARNING_FLAGS) 	\
-	   $(CXX_OPTIMIZATION_FLAGS)	\
-	   $(CXX_SHARED_LIB_FLAGS)
+ifeq ($(USE_SYSTEM_ZLIB), false)
+INCLUDES += -I ../third-party/zlib/
+endif
+
+ifeq ($(USE_SYSTEM_LIBBZ2), false)
+INCLUDES += -I ../third-party/bzip2/
+endif
+
+# Warnings in common to C and C++
+WARNINGS=-Wall
+
+# Flags in common to C and C++
+COMMON_FLAGS=-O3 -fPIC
+SEQAN_FLAGS=-DSEQAN_HAS_ZLIB=1 -DSEQAN_HAS_BZIP2=1
 
-CFLAGS=$(INCLUDES)
-C_WARNING_FLAGS=-Wall
-C_OPTIMIZATION_FLAGS=-O3
-C_SHARED_LIB_FLAGS=-fPIC
-CFLAGS+= $(C_WARNING_FLAGS) $(C_OPTIMIZATION_FLAGS) $(C_SHARED_LIB_FLAGS)
+# Base C/CXXFLAGS
+CPPFLAGS ?=
+CPPFLAGS += $(SEQAN_FLAGS)
 
-LIBS=
+CXXFLAGS ?=
+CXXFLAGS += $(COMMON_FLAGS) $(WARNINGS)
+CXXFLAGS += -Wstrict-null-sentinel
+CXXFLAGS += $(INCLUDES) $(CPPFLAGS)
+
+CFLAGS	 ?=
+CXXFLAGS += $(COMMON_FLAGS) $(WARNINGS)
+CFLAGS   += -Wshadow -Wcast-align -Wstrict-prototypes
+CFLAGS   += $(INCLUDES) $(CPPFLAGS)
+
+LDFLAGS  ?=
+ifneq ($(USE_SYSTEM_ZLIB), false)
+LDFLAGS  += -lz
+endif
+
+ifneq ($(USE_SYSTEM_LIBBZ2), false)
+LDFLAGS  += -lbz2
+endif
 
 ifeq ($(WANT_DEBUGGING), true)
-CXX_DEBUG_FLAGS=-g
-CXXFLAGS+= $(CXX_DEBUG_FLAGS)
-CFLAGS+= $(CXX_DEBUG_FLAGS)
-else
-CXX_DEBUG_FLAGS=
+DEBUG_FLAGS=-g
+CXXFLAGS += $(DEBUG_FLAGS)
+CFLAGS   += $(DEBUG_FLAGS)
 endif
 
 ifeq ($(WANT_EXTRA_SANITY_CHECKING), true)
 DEFINE_KHMER_EXTRA_SANITY_CHECKS=-DKHMER_EXTRA_SANITY_CHECKS
-CXXFLAGS+= $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
-CFLAGS+= $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
-else
-DEFINE_KHMER_EXTRA_SANITY_CHECKS=
+CXXFLAGS += $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
+CFLAGS   += $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
 endif
 
 ifeq ($(WANT_PROFILING), true)
@@ -77,19 +90,15 @@ ifeq ($(PROFILER_OF_CHOICE), TAU)
 CXX=tau_cxx.sh
 endif
 ifeq ($(PROFILER_OF_CHOICE), gprof)
-PROFILING_LIBS=-pg
-CXXFLAGS+= -pg
-LIBS+= $(PROFILING_LIBS)
+CXXFLAGS += -pg
+CFLAGS   += -pg
+LDFLAGS  += -pg
 endif
 endif
 
-ifeq ($(WITH_INTERNAL_METRICS), true)
-CXXFLAGS+= -DWITH_INTERNAL_METRICS
-endif
-
 # Place POSIX threads last in linking order, if needed.
 ifneq ($(shell uname), Linux)
-LIBS+= -pthread
+LDFLAGS  += -pthread
 endif
 
 
@@ -99,47 +108,52 @@ HAVE_OPENMP=$(shell 						\
 	      rm -f chkomp)
 
 ifeq ($(HAVE_OPENMP), true)
-	CFLAGS += -fopenmp
-	CXXFLAGS += -fopenmp
+CXXFLAGS +=-fopenmp
+CFLAGS   +=-fopenmp
 endif
 
-VERSION = $(shell python get_version.py)
+ifneq ($(PACKAGE_VERSION),)
+VERSION = $(PACKAGE_VERSION)
+else
+VERSION = $(shell ./get_version.py)
+endif
+
+MAJOR_VERSION = $(shell echo $(VERSION) | sed -e 's/^\([^-\.]*\)\.\([^-\.]*\).*/\1/')
+MINOR_VERSION = $(shell echo $(VERSION) | sed -e 's/^\([^-\.]*\)\.\([^-\.]*\).*/\2/')
 
-LIBVERSION = $(shell python get_version.py | sed -e 's/^\([^-]*\)-.*/\1/')
-LIBKHMERSO=libkhmer.so.$(LIBVERSION)
+LIB_VERSION = $(MAJOR_VERSION).$(MINOR_VERSION)
 
-CXXFLAGS+= -DVERSION=$(VERSION)
+ifeq ($(shell uname), Darwin)
+SHARED_EXT   = dylib
+SONAME       = liboxli.$(SHARED_EXT).$(MAJOR_VERSION)
+SONAME_FLAGS = -install_name $(PREFIX)/lib/$(SONAME) -compatibility_version $(MAJOR_VERSION) -current_version $(LIB_VERSION)
+else
+SHARED_EXT   = so
+SONAME       = liboxli.$(SHARED_EXT).$(MAJOR_VERSION)
+SONAME_FLAGS = -Wl,-soname=$(SONAME)
+endif
+
+# The ABI version of liboxli
+LIBVERSION = 1
+LIBKHMERSO=liboxli.$(SHARED_EXT).$(LIB_VERSION)
+
+CXXFLAGS += -DVERSION=$(VERSION)
 
 NO_UNIQUE_RC=0
-CXXFLAGS+= -DNO_UNIQUE_RC=$(NO_UNIQUE_RC)
+CXXFLAGS += -DNO_UNIQUE_RC=$(NO_UNIQUE_RC)
+CFLAGS   += -DNO_UNIQUE_RC=$(NO_UNIQUE_RC)
 
 export CXX
 export CFLAGS
 export CXXFLAGS
-export LIBS
+export LDFLAGS
 export VERSION
 
 
 #### Third party dependencies ####
-# ZLIB
+# ZLIB, use .lo not .o, so we get -fPIC and other library-related flags
 ZLIB_DIR=../third-party/zlib
 ZLIB_OBJS_BASE=\
-	adler32.o \
-	crc32.o \
-	deflate.o \
-	infback.o \
-	inffast.o \
-	inflate.o \
-	inftrees.o \
-	trees.o \
-	zutil.o \
-	compress.o \
-	uncompr.o \
-	gzclose.o \
-	gzlib.o \
-	gzread.o \
-	gzwrite.o
-ZLIB_PIC_OBJS_BASE=\
 	adler32.lo \
 	crc32.lo \
 	deflate.lo \
@@ -157,7 +171,6 @@ ZLIB_PIC_OBJS_BASE=\
 	gzwrite.lo
 
 ZLIB_OBJS=$(addprefix $(ZLIB_DIR)/, $(ZLIB_OBJS_BASE))
-ZLIB_PIC_OBJS=$(addprefix $(ZLIB_DIR)/, $(ZLIB_PIC_OBJS_BASE))
 
 # BZ2
 BZIP2_DIR=../third-party/bzip2
@@ -173,7 +186,7 @@ BZIP2_OBJS_BASE= \
 BZIP2_OBJS=$(addprefix $(BZIP2_DIR)/, $(BZIP2_OBJS_BASE))
 
 
-#### khmer proper below here ####
+#### oxli proper below here ####
 
 LIBKHMER_OBJS= \
 	counting.o \
@@ -182,14 +195,25 @@ LIBKHMER_OBJS= \
 	hllcounter.o \
 	kmer_hash.o \
 	labelhash.o \
-	perf_metrics.o \
 	read_aligner.o \
 	read_parsers.o \
 	subset.o \
-	trace_logger.o \
-	murmur3.o \
-	$(BZIP2_OBJS) \
-	$(ZLIB_PIC_OBJS)
+	murmur3.o
+
+PRECOMILE_OBJS ?=
+PRECLEAN_TARGS ?=
+
+ifeq ($(USE_SYSTEM_ZLIB), false)
+LIBKHMER_OBJS  += $(ZLIB_OBJS)
+PRECOMILE_OBJS += $(ZLIB_OBJS)
+PRECLEAN_TARGS += zlibclean
+endif
+
+ifeq ($(USE_SYSTEM_LIBBZ2), false)
+LIBKHMER_OBJS  += $(BZIP2_OBJS)
+PRECOMILE_OBJS += $(BZIP2_OBJS)
+PRECLEAN_TARGS += libbz2clean
+endif
 
 KHMER_HEADERS= \
 	counting.hh \
@@ -199,45 +223,40 @@ KHMER_HEADERS= \
 	khmer.hh \
 	kmer_hash.hh \
 	labelhash.hh \
-	perf_metrics.hh \
 	primes.hh \
 	read_aligner.hh \
 	read_parsers.hh \
 	subset.hh \
-	trace_logger.hh
-
-TEST_PROGS = test-Colors test-read-aligner test-compile
 
 # START OF RULES #
 
 # The all rule comes first!
-all: $(LIBKHMERSO) libkhmer.a khmer.pc
+all: $(LIBKHMERSO) liboxli.a oxli.pc
 
-clean:
-	rm -f *.o *.a *.so* khmer.pc $(TEST_PROGS)
+zlibclean:
 	(cd $(ZLIB_DIR) && make distclean)
+libbz2clean:
 	(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so clean)
 
-test: $(TEST_PROGS)
+clean: $(PRECLEAN_TARGS)
+	rm -f *.o *.a *.$(SHARED_EXT)* oxli.pc $(TEST_PROGS)
 
-install: $(LIBKHMERSO) libkhmer.a khmer.pc $(KHMER_HEADERS)
-	mkdir -p $(PREFIX)/lib $(PREFIX)/lib/pkgconfig $(PREFIX)/include/
-	cp -r $(KHMER_HEADERS) \
-		../third-party/smhasher/MurmurHash3.h \
-		$(PREFIX)/include/
-	cp khmer.pc $(PREFIX)/lib/pkgconfig/
-	cp $(LIBKHMERSO) libkhmer.a $(PREFIX)/lib
-	ln -sf $(PREFIX)/lib/$(LIBKHMERSO) $(PREFIX)/lib/libkhmer.so
+install: $(LIBKHMERSO) liboxli.a oxli.pc $(KHMER_HEADERS)
+	mkdir -p $(PREFIX)/lib $(PREFIX)/lib/pkgconfig $(PREFIX)/include/oxli
+	cp -r $(KHMER_HEADERS) 				\
+		../third-party/smhasher/MurmurHash3.h 	\
+		$(PREFIX)/include/oxli/
+	cp oxli.pc $(PREFIX)/lib/pkgconfig/
+	cp $(LIBKHMERSO) liboxli.a $(PREFIX)/lib
+	ln -sf $(PREFIX)/lib/$(LIBKHMERSO) $(PREFIX)/lib/$(SONAME)
+	ln -sf $(PREFIX)/lib/$(SONAME) $(PREFIX)/lib/liboxli.$(SHARED_EXT)
 
-khmer.pc: khmer.pc.in
+oxli.pc: oxli.pc.in
 	sed -e 's, at prefix@,$(PREFIX),'  -e 's, at VERSION@,$(VERSION),' $< >$@
 
 $(ZLIB_OBJS):
 	(cd $(ZLIB_DIR) && ./configure && make $(ZLIB_OBJS_BASE))
 
-$(ZLIB_PIC_OBJS):
-	(cd $(ZLIB_DIR) && ./configure && make $(ZLIB_PIC_OBJS_BASE))
-
 $(BZIP2_OBJS):
 	(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so $(BZIP2_OBJS_BASE))
 
@@ -245,17 +264,14 @@ $(BZIP2_OBJS):
 murmur3.o: ../third-party/smhasher/MurmurHash3.cc
 	$(CXX) $(CXXFLAGS) -c -o $@ $<
 
-%.o: %.cc $(ZLIB_OBJS) $(ZLIB_PIC_OBJS) $(BZIP2_OBJS) $(KHMER_HEADERS)
-	$(CXX) $(CXXFLAGS) -c -o $@ $<
+%.o: %.cc $(PRECOMILE_OBJS) $(KHMER_HEADERS)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -c -o $@ $<
 
 $(LIBKHMERSO): $(LIBKHMER_OBJS)
-	$(CXX) $(CXXFLAGS) -shared -o $@ $(LIBKHMER_OBJS)
-	ln -sf $(LIBKHMERSO) libkhmer.so
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $^
+	ln -sf $(LIBKHMERSO) $(SONAME)
+	ln -sf $(SONAME) liboxli.$(SHARED_EXT)
 
-libkhmer.a: $(LIBKHMER_OBJS)
-	ar rcs $@ $(LIBKHMER_OBJS)
+liboxli.a: $(LIBKHMER_OBJS)
+	ar rcs $@ $^
 	ranlib $@
-
-# catch-all rule for test drivers
-test-%: test-%.cc libkhmer.a
-	$(CXX) $(CXXFLAGS) -I . -o $@ $< libkhmer.a
diff --git a/lib/counting.cc b/lib/counting.cc
index 27b23ad..2b9b921 100644
--- a/lib/counting.cc
+++ b/lib/counting.cc
@@ -5,16 +5,17 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "hashtable.hh"
+#include <errno.h>
+#include <algorithm>
+#include <iostream>
+#include <sstream> // IWYU pragma: keep
+
 #include "counting.hh"
 #include "hashbits.hh"
+#include "hashtable.hh"
+#include "khmer_exception.hh"
 #include "read_parsers.hh"
-
 #include "zlib.h"
-#include <math.h>
-#include <algorithm>
-#include <sstream>
-#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -35,7 +36,11 @@ void CountingHash::output_fasta_kmer_pos_freq(
     Read read;
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         long numPos = seq.length() - _ksize + 1;
@@ -119,31 +124,31 @@ CountingHash::abundance_distribution(
         throw khmer_exception();
     }
 
-    try {
-        while(!parser->is_complete()) {
+    while(!parser->is_complete()) {
+        try {
             read = parser->get_next_read();
-            seq = read.sequence;
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
+        seq = read.sequence;
 
-            if (check_and_normalize_read(seq)) {
-                KMerIterator kmers(seq.c_str(), _ksize);
+        if (check_and_normalize_read(seq)) {
+            KMerIterator kmers(seq.c_str(), _ksize);
 
-                while(!kmers.done()) {
-                    HashIntoType kmer = kmers.next();
+            while(!kmers.done()) {
+                HashIntoType kmer = kmers.next();
 
-                    if (!tracking->get_count(kmer)) {
-                        tracking->count(kmer);
+                if (!tracking->get_count(kmer)) {
+                    tracking->count(kmer);
 
-                        BoundedCounterType n = get_count(kmer);
-                        dist[n]++;
-                    }
+                    BoundedCounterType n = get_count(kmer);
+                    dist[n]++;
                 }
-
-                name.clear();
-                seq.clear();
             }
 
+            name.clear();
+            seq.clear();
         }
-    } catch (NoMoreReadsAvailable) {
     }
     return dist;
 }
@@ -180,7 +185,11 @@ HashIntoType * CountingHash::fasta_count_kmers_by_position(
     unsigned long long read_num = 0;
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
 
         seq = read.sequence;
         bool valid_read = check_and_normalize_read(seq);
@@ -232,7 +241,11 @@ void CountingHash::fasta_dump_kmers_by_abundance(
     unsigned long long read_num = 0;
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         bool valid_read = check_and_normalize_read(seq);
         seq = read.sequence;
 
@@ -519,9 +532,11 @@ CountingHashFileReader::CountingHashFileReader(
         infile.read((char *) &ht_type, 1);
         if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Does not start with signature for a khmer " <<
-                "file: " << signature << " Should be: " <<
-                SAVED_SIGNATURE;
+            err << "Does not start with signature for a khmer file: 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " Should be: " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
             std::ostringstream err;
@@ -898,7 +913,7 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
                     msg << strerror(errno);
                 }
                 gzclose(outfile);
-                throw khmer_file_exception(msg.str().c_str());
+                throw khmer_file_exception(msg.str());
             }
             written += gz_result;
         }
@@ -944,7 +959,11 @@ void CountingHash::collect_high_abundance_kmers(
 
     bool done = false;
     while(!parser->is_complete() && !done)  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         currSeq = read.sequence;
 
         // do we want to process it?
@@ -983,7 +1002,11 @@ void CountingHash::collect_high_abundance_kmers(
 
     total_reads = 0;
     while(!parser->is_complete() && total_reads != stop_at_read)  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         currSeq = read.sequence;
 
         // do we want to process it?
diff --git a/lib/counting.hh b/lib/counting.hh
index 4849870..ac8b70e 100644
--- a/lib/counting.hh
+++ b/lib/counting.hh
@@ -8,19 +8,38 @@
 #ifndef COUNTING_HH
 #define COUNTING_HH
 
-#include "hashtable.hh"
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <map>
+#include <string>
+#include <utility>
 #include <vector>
 
+#include "hashtable.hh"
+#include "khmer.hh"
+#include "kmer_hash.hh"
+
+namespace khmer
+{
+class Hashbits;
+
+namespace read_parsers
+{
+struct IParser;
+}  // namespace read_parsers
+}  // namespace khmer
+
 namespace khmer
 {
 typedef std::map<HashIntoType, BoundedCounterType> KmerCountMap;
 
-class CountingHashIntersect;
 class CountingHashFile;
 class CountingHashFileReader;
 class CountingHashFileWriter;
 class CountingHashGzFileReader;
 class CountingHashGzFileWriter;
+class CountingHashIntersect;
 
 class CountingHash : public khmer::Hashtable
 {
diff --git a/lib/get_version.py b/lib/get_version.py
old mode 100644
new mode 100755
index 5d7fa66..929a1b8
--- a/lib/get_version.py
+++ b/lib/get_version.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 from __future__ import print_function
 import sys
 sys.path.insert(0, '../')
diff --git a/lib/hashbits.cc b/lib/hashbits.cc
index 8119305..f103e02 100644
--- a/lib/hashbits.cc
+++ b/lib/hashbits.cc
@@ -5,14 +5,14 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include <iostream>
-#include "hashtable.hh"
+#include <errno.h>
+#include <sstream> // IWYU pragma: keep
+
 #include "hashbits.hh"
+#include "hashtable.hh"
+#include "khmer_exception.hh"
 #include "read_parsers.hh"
 
-#include <sstream>
-#include <errno.h>
-
 using namespace std;
 using namespace khmer;
 using namespace khmer:: read_parsers;
@@ -99,9 +99,11 @@ void Hashbits::load(std::string infilename)
         infile.read((char *) &ht_type, 1);
         if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Does not start with signature for a khmer " <<
-                "file: " << signature << " Should be: " <<
-                SAVED_SIGNATURE;
+            err << "Does not start with signature for a khmer file: 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " Should be: " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
             std::ostringstream err;
@@ -187,7 +189,11 @@ void Hashbits::consume_fasta_overlap(const std::string &filename,
 
     IParser* parser = IParser::get_parser(filename.c_str());
     while(!parser->is_complete())  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         total_reads++;
     }
 //block size for curve
@@ -217,7 +223,11 @@ void Hashbits::consume_fasta_overlap(const std::string &filename,
     //
 
     while(!parser->is_complete())  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         currSeq = read.sequence;
 
         unsigned int this_n_consumed;
diff --git a/lib/hashbits.hh b/lib/hashbits.hh
index 3139650..2eb2e7e 100644
--- a/lib/hashbits.hh
+++ b/lib/hashbits.hh
@@ -8,8 +8,14 @@
 #ifndef HASHBITS_HH
 #define HASHBITS_HH
 
+#include <stddef.h>
+#include <string.h>
+#include <string>
 #include <vector>
+
 #include "hashtable.hh"
+#include "khmer.hh"
+#include "kmer_hash.hh"
 
 namespace khmer
 {
diff --git a/lib/hashtable.cc b/lib/hashtable.cc
index cf4c2cb..85d417d 100644
--- a/lib/hashtable.cc
+++ b/lib/hashtable.cc
@@ -5,68 +5,25 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "khmer.hh"
+#include <errno.h>
+#include <math.h>
+#include <algorithm>
+#include <deque>
+#include <fstream>
+#include <iostream>
+#include <sstream> // IWYU pragma: keep
+#include <queue>
+#include <set>
+
+#include "counting.hh"
 #include "hashtable.hh"
+#include "khmer.hh"
 #include "read_parsers.hh"
-#include "counting.hh"
-
-#include <algorithm>
-#include <sstream>
-#include <errno.h>
 
 using namespace std;
 using namespace khmer;
 using namespace khmer:: read_parsers;
 
-#ifdef WITH_INTERNAL_METRICS
-HashTablePerformanceMetrics::
-HashTablePerformanceMetrics( )
-    : IPerformanceMetrics( ),
-      clock_nsecs_norm_read( 0 ),
-      cpu_nsecs_norm_read( 0 ),
-      clock_nsecs_hash_kmer( 0 ),
-      cpu_nsecs_hash_kmer( 0 ),
-      clock_nsecs_update_tallies( 0 ),
-      cpu_nsecs_update_tallies( 0 )
-{ }
-
-
-HashTablePerformanceMetrics::
-~HashTablePerformanceMetrics( )
-{ }
-
-
-void
-HashTablePerformanceMetrics::
-accumulate_timer_deltas( uint32_t metrics_key )
-{
-
-    switch (metrics_key) {
-    case MKEY_TIME_NORM_READ:
-        clock_nsecs_norm_read +=
-            _timespec_diff_in_nsecs( _temp_clock_start, _temp_clock_stop );
-        cpu_nsecs_norm_read   +=
-            _timespec_diff_in_nsecs( _temp_cpu_start, _temp_cpu_stop );
-        break;
-    case MKEY_TIME_HASH_KMER:
-        clock_nsecs_hash_kmer +=
-            _timespec_diff_in_nsecs( _temp_clock_start, _temp_clock_stop );
-        cpu_nsecs_hash_kmer   +=
-            _timespec_diff_in_nsecs( _temp_cpu_start, _temp_cpu_stop );
-        break;
-    case MKEY_TIME_UPDATE_TALLIES:
-        clock_nsecs_update_tallies +=
-            _timespec_diff_in_nsecs( _temp_clock_start, _temp_clock_stop );
-        cpu_nsecs_update_tallies   +=
-            _timespec_diff_in_nsecs( _temp_cpu_start, _temp_cpu_stop );
-        break;
-    default:
-        throw InvalidPerformanceMetricsKey( );
-    }
-
-}
-#endif
-
 //
 // check_and_process_read: checks for non-ACGT characters before consuming
 //
@@ -146,18 +103,18 @@ consume_fasta(
 
     // Iterate through the reads and consume their k-mers.
     while (!parser->is_complete( )) {
-
+        bool is_valid;
         try {
-            bool is_valid;
             read = parser->get_next_read( );
+        } catch (NoMoreReadsAvailable) {
+            break;
+        }
 
-            unsigned int this_n_consumed =
-                check_and_process_read(read.sequence, is_valid);
+        unsigned int this_n_consumed =
+            check_and_process_read(read.sequence, is_valid);
 
-            __sync_add_and_fetch( &n_consumed, this_n_consumed );
-            __sync_add_and_fetch( &total_reads, 1 );
-        } catch (read_parsers::NoMoreReadsAvailable) {
-        }
+        __sync_add_and_fetch( &n_consumed, this_n_consumed );
+        __sync_add_and_fetch( &total_reads, 1 );
 
     } // while reads left for parser
 
@@ -314,20 +271,23 @@ void Hashtable::load_tagset(std::string infilename, bool clear_tags)
     }
 
     unsigned char version, ht_type;
-    char signature[4];
     unsigned int save_ksize = 0;
 
     size_t tagset_size = 0;
     HashIntoType * buf = NULL;
 
     try {
+        char signature[4];
         infile.read(signature, 4);
         infile.read((char *) &version, 1);
         infile.read((char *) &ht_type, 1);
         if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Incorrect file signature " << signature
-                << " while reading tagset from " << infilename
+            err << "Incorrect file signature 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " while reading tagset from " << infilename
                 << "; should be " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
@@ -485,7 +445,12 @@ consume_fasta_and_tag(
     // Iterate through the reads and consume their k-mers.
     while (!parser->is_complete( )) {
 
-        read = parser->get_next_read( );
+        try {
+            read = parser->get_next_read( );
+        } catch (NoMoreReadsAvailable &e) {
+            // Bail out if this error is raised
+            break;
+        }
 
         if (check_and_normalize_read( read.sequence )) {
             unsigned long long this_n_consumed = 0;
@@ -523,7 +488,11 @@ void Hashtable::consume_fasta_and_tag_with_stoptags(const std::string &filename,
     //
 
     while(!parser->is_complete())  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         read_tags.clear();
@@ -644,7 +613,11 @@ void Hashtable::consume_partitioned_fasta(const std::string &filename,
     //
 
     while(!parser->is_complete())  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         if (check_and_normalize_read(seq)) {
@@ -691,7 +664,11 @@ void Hashtable::consume_fasta_and_traverse(const std::string &filename,
     //
 
     while(!parser->is_complete())  {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         if (check_and_normalize_read(seq)) {	// process?
@@ -897,7 +874,11 @@ void Hashtable::filter_if_present(const std::string &infilename,
     HashIntoType kmer;
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         if (check_and_normalize_read(seq)) {
@@ -1296,19 +1277,22 @@ void Hashtable::load_stop_tags(std::string infilename, bool clear_tags)
     }
 
     unsigned char version, ht_type;
-    char signature[4];
     unsigned int save_ksize = 0;
 
     size_t tagset_size = 0;
 
     try {
+        char signature[4];
         infile.read(signature, 4);
         infile.read((char *) &version, 1);
         infile.read((char *) &ht_type, 1);
         if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Incorrect file signature " << signature
-                << " while reading stoptags from " << infilename
+            err << "Incorrect file signature 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " while reading stoptags from " << infilename
                 << "; should be " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
diff --git a/lib/hashtable.hh b/lib/hashtable.hh
index d18be70..f0f5135 100644
--- a/lib/hashtable.hh
+++ b/lib/hashtable.hh
@@ -9,22 +9,35 @@
 #define HASHTABLE_HH
 
 
-#include <vector>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <fstream>
 #include <iostream>
 #include <list>
-#include <queue>
-
-#include <fstream>
-#include <string>
-#include <set>
 #include <map>
 #include <queue>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
 
 #include "khmer.hh"
 #include "khmer_exception.hh"
+#include "kmer_hash.hh"
 #include "read_parsers.hh"
 #include "subset.hh"
-#include "kmer_hash.hh"
+
+namespace khmer
+{
+class CountingHash;
+class Hashtable;
+
+namespace read_parsers
+{
+struct IParser;
+}  // namespace read_parsers
+}  // namespace khmer
 
 #define MAX_KEEPER_SIZE int(1e6)
 
@@ -40,29 +53,6 @@
 
 namespace khmer
 {
-#ifdef WITH_INTERNAL_METRICS
-struct HashTablePerformanceMetrics : public IPerformanceMetrics {
-
-    enum {
-        MKEY_TIME_NORM_READ,
-        MKEY_TIME_HASH_KMER,
-        MKEY_TIME_UPDATE_TALLIES
-    };
-
-    uint64_t	clock_nsecs_norm_read;
-    uint64_t	cpu_nsecs_norm_read;
-    uint64_t	clock_nsecs_hash_kmer;
-    uint64_t	cpu_nsecs_hash_kmer;
-    uint64_t	clock_nsecs_update_tallies;
-    uint64_t	cpu_nsecs_update_tallies;
-
-    HashTablePerformanceMetrics( );
-    virtual ~HashTablePerformanceMetrics( );
-
-    virtual void	accumulate_timer_deltas( uint32_t metrics_key );
-
-};
-#endif
 
 //
 // Sequence iterator class, test.  Not really a C++ iterator yet.
@@ -185,7 +175,7 @@ protected:
     HashIntoType    bitmask;
     unsigned int    _nbits_sub_1;
 
-    Hashtable( WordLength ksize )
+    explicit Hashtable( WordLength ksize )
         : _max_count( MAX_KCOUNT ),
           _max_bigcount( MAX_BIGCOUNT ),
           _ksize( ksize )
@@ -241,7 +231,8 @@ protected:
 
     uint32_t _all_tags_spin_lock;
 
-    NONCOPYABLE(Hashtable);
+    explicit Hashtable(const Hashtable&);
+    Hashtable& operator=(const Hashtable&);
 
 public:
     SubsetPartition * partition;
diff --git a/lib/hllcounter.cc b/lib/hllcounter.cc
index 91ac9f7..c115524 100644
--- a/lib/hllcounter.cc
+++ b/lib/hllcounter.cc
@@ -5,18 +5,18 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "hllcounter.hh"
-
 #include <math.h>
+#include <stdlib.h>
 #include <algorithm>
+#include <map>
 #include <numeric>
-#include <inttypes.h>
-#include <sstream>
+#include <utility>
 
+#include "hllcounter.hh"
 #include "khmer.hh"
+#include "khmer_exception.hh"
 #include "kmer_hash.hh"
 #include "read_parsers.hh"
-#include "khmer_exception.hh"
 
 #ifdef _OPENMP
 #include <omp.h>
@@ -392,20 +392,21 @@ void HLLCounter::consume_fasta(
                 // Iterate through the reads and consume their k-mers.
                 try {
                     read = parser->get_next_read();
+                } catch (read_parsers::NoMoreReadsAvailable) {
+                    break;
+                }
 
-                    #pragma omp task default(none) firstprivate(read) \
-                    shared(counters, n_consumed_partial, total_reads_partial)
-                    {
-                        bool is_valid;
-                        int n, t = omp_get_thread_num();
-                        n = counters[t]->check_and_process_read(read.sequence,
-                        is_valid);
-                        n_consumed_partial[t] += n;
-                        if (is_valid) {
-                            total_reads_partial[t] += 1;
-                        }
+                #pragma omp task default(none) firstprivate(read) \
+                shared(counters, n_consumed_partial, total_reads_partial)
+                {
+                    bool is_valid;
+                    int n, t = omp_get_thread_num();
+                    n = counters[t]->check_and_process_read(read.sequence,
+                                                            is_valid);
+                    n_consumed_partial[t] += n;
+                    if (is_valid) {
+                        total_reads_partial[t] += 1;
                     }
-                } catch (read_parsers::NoMoreReadsAvailable) {
                 }
 
             } // while reads left for parser
diff --git a/lib/hllcounter.hh b/lib/hllcounter.hh
index f03cff0..0b64a31 100644
--- a/lib/hllcounter.hh
+++ b/lib/hllcounter.hh
@@ -8,11 +8,20 @@
 #ifndef HLLCOUNTER_HH
 #define HLLCOUNTER_HH
 
-#include <vector>
 #include <string>
+#include <vector>
 
+#include "khmer.hh"
 #include "read_parsers.hh"
 
+namespace khmer
+{
+namespace read_parsers
+{
+struct IParser;
+}  // namespace read_parsers
+}  // namespace khmer
+
 
 namespace khmer
 {
diff --git a/lib/ht-diff.cc b/lib/ht-diff.cc
deleted file mode 100644
index e6b9bed..0000000
--- a/lib/ht-diff.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-// Simple C++ implementation of a diff between counting hashes.
-// Author: Eric A. McDonald
-
-// You can learn which hash bins have differing values with a simple 'cmp'
-// between two hash files (if yo uaccount for file header length),
-// but this program actually loads the tables into memory
-// and checks things such as number of hash tables and hash table sizes.
-// Also, any differences in count or bigcount values are reported in
-// human-readable form.
-
-#if (__cplusplus >= 201103L)
-#   include <cstdint>
-#else
-extern "C"
-{
-#   include <stdint.h>
-}
-#endif
-#include <cstring>
-#include <cstdio>
-#include <cerrno>
-#include <cstdlib>
-
-#include <string>
-
-#include "khmer.hh"
-#include "error.hh"
-#include "counting.hh"
-
-using namespace std;
-using namespace khmer;
-
-
-static const char *	    SHORT_OPTS		= "C:R";
-
-
-int main( int argc, char * argv[ ] )
-{
-    int			rc		    = 0;
-    int			opt		    = -1;
-    char *		conv_residue	    = NULL;
-    uint32_t		max_count	    = MAX_KCOUNT;
-    bool		report_all	    = false;
-    string		ifile_name_1;
-    string		ifile_name_2;
-
-    while (-1 != (opt = getopt( argc, argv, SHORT_OPTS ))) {
-
-        switch (opt) {
-        case 'C':
-            max_count = (uint32_t)strtoul( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid count threshold" );
-            }
-            break;
-        case 'R':
-            report_all = true;
-            break;
-        default:
-            error( 0, 0, "Skipping unknown arg, '%c'", optopt );
-        }
-
-    }
-
-    if (optind < argc) {
-        ifile_name_1 = string( argv[ optind++ ] );
-    } else {
-        error( EINVAL, 0, "Name of first hash table file required" );
-    }
-
-    if (optind < argc) {
-        ifile_name_2 = string( argv[ optind++ ] );
-    } else {
-        error( EINVAL, 0, "Name of second hash table file required" );
-    }
-
-    CountingHash ht1( 20, 1 );
-    CountingHash ht2( 20, 1 );
-    printf( "Loading hash tables into memory....\n" );
-    ht1.load( ifile_name_1 );
-    ht2.load( ifile_name_2 );
-
-    HashIntoType i = 0, max_ht_size = 0;
-    std:: vector<HashIntoType> ht1_sizes = ht1.get_tablesizes( );
-    std:: vector<HashIntoType> ht2_sizes = ht2.get_tablesizes( );
-
-    // Compare number of tables.
-    if (ht1_sizes.size( ) != ht2_sizes.size( )) {
-        fprintf(
-            stderr, "Unequal number of hashtables (%lu and %lu).\n",
-            (unsigned long int)ht1_sizes.size( ),
-            (unsigned long int)ht2_sizes.size( )
-        );
-        exit( 1 );
-    } else
-        printf(
-            "Number of Hash Tables: %lu\n",
-            (unsigned long int)ht1_sizes.size( )
-        );
-
-    // Compare sizes of each table.
-    for (i = 0; i < ht1_sizes.size( ); ++i) {
-        if (ht1_sizes[ i ] != ht2_sizes[ i ]) {
-            fprintf(
-                stderr, "Hash table %lu has mismatched sizes of %llu and %llu.\n",
-                (unsigned long int)i, ht1_sizes[ i ], ht2_sizes[ i ]
-            );
-            exit( 1 );
-        } else {
-            printf(
-                "Size of Hash Table %lu: %llu bins\n",
-                (unsigned long int)i, ht1_sizes[ i ]
-            );
-            if (max_ht_size < ht1_sizes[ i ]) {
-                max_ht_size = ht1_sizes[ i ];
-            }
-        }
-    }
-
-    printf( "Scanning hash key space....\n" );
-    for (i = 0; i < max_ht_size; ++i) {
-        // Truncate counts at specified saturation threshold.
-        // (This accounts for the sloppy counting used for >1 threads.)
-        uint32_t count1 = MIN( ht1.get_count( i ), max_count );
-        uint32_t count2 = MIN( ht2.get_count( i ), max_count );
-        if (count1 != count2) {
-            fprintf(
-                stderr, "Hash key %llu has mismatched counts of %u and %u.\n",
-                i, ht1.get_count( i ), ht2.get_count( i )
-            );
-            if (!report_all) {
-                exit( 1 );
-            }
-        }
-    }
-    // TODO: Implement bigcount checking.
-
-    return rc;
-
-}
-
-// vim: set sts=4 sw=4 tw=80:
diff --git a/lib/khmer_exception.hh b/lib/khmer_exception.hh
index 95553df..5c524cd 100644
--- a/lib/khmer_exception.hh
+++ b/lib/khmer_exception.hh
@@ -17,7 +17,7 @@ namespace khmer
 ///
 // A base class for all exceptions.
 //
-// All exceptions should be derived from this base class.
+// All exceptions should be derived from this base class or a sub-class
 //
 class khmer_exception : public std::exception
 {
@@ -35,6 +35,9 @@ protected:
     const std::string _msg;
 };
 
+
+/////// Base Exceptions /////
+
 ///
 // A base class for file exceptions.
 //
@@ -45,15 +48,23 @@ public:
         : khmer_exception(msg) { }
 };
 
-struct InvalidStreamBuffer : public khmer_exception {
+// A base exception for value exceptions
+class khmer_value_exception : public khmer_exception
+{
+public:
+    explicit khmer_value_exception(const std::string& msg)
+        : khmer_exception(msg) { }
 };
 
-class InvalidStreamHandle : public khmer_file_exception
+/////// Specialised Exceptions /////
+
+class InvalidStream : public khmer_file_exception
 {
 public:
-    InvalidStreamHandle()
-        : khmer_file_exception("Generic InvalidStreamHandle error") {}
-    InvalidStreamHandle(const std::string& msg) : khmer_file_exception(msg) {}
+    InvalidStream()
+        : khmer_file_exception("Generic InvalidStream error") {}
+    explicit InvalidStream(const std::string& msg)
+        : khmer_file_exception(msg) {}
 };
 
 class StreamReadError : public khmer_file_exception
@@ -61,7 +72,8 @@ class StreamReadError : public khmer_file_exception
 public:
     StreamReadError()
         : khmer_file_exception("Generic StreamReadError error") {}
-    StreamReadError(const std::string& msg) : khmer_file_exception(msg) {}
+    explicit StreamReadError(const std::string& msg)
+        : khmer_file_exception(msg) {}
 };
 
 
@@ -69,11 +81,11 @@ public:
 // An exception for invalid arguments to functions
 //
 
-class InvalidValue : public khmer_exception
+class InvalidValue : public khmer_value_exception
 {
 public:
     explicit InvalidValue(const std::string& msg)
-        : khmer_exception(msg) { }
+        : khmer_value_exception(msg) { }
 };
 
 ///
@@ -87,7 +99,7 @@ public:
         : khmer_exception(msg) { }
 };
 
-}
+} // end namespace khmer
 
 #endif // KHMER_EXCEPTION_HH
 
diff --git a/lib/kmer_hash.cc b/lib/kmer_hash.cc
index 61c3741..cc7a9bd 100644
--- a/lib/kmer_hash.cc
+++ b/lib/kmer_hash.cc
@@ -5,14 +5,16 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include <math.h>
-#include <string>
-#include <iostream>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
 #include <algorithm>
+#include <string>
 
+#include "MurmurHash3.h"
 #include "khmer.hh"
+#include "khmer_exception.hh"
 #include "kmer_hash.hh"
-#include "MurmurHash3.h"
 
 using namespace std;
 
diff --git a/lib/labelhash.cc b/lib/labelhash.cc
index b6366f5..4d801ec 100644
--- a/lib/labelhash.cc
+++ b/lib/labelhash.cc
@@ -5,10 +5,18 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "labelhash.hh"
-
-#include <sstream>
 #include <errno.h>
+#include <string.h>
+#include <iostream>
+#include <sstream> // IWYU pragma: keep
+#include <set>
+
+#include "hashbits.hh"
+#include "hashtable.hh"
+#include "khmer_exception.hh"
+#include "labelhash.hh"
+#include "read_parsers.hh"
+#include "subset.hh"
 
 #define IO_BUF_SIZE 250*1000*1000
 
@@ -65,7 +73,11 @@ LabelHash::consume_fasta_and_tag_with_labels(
     Label * the_label;
     // Iterate through the reads and consume their k-mers.
     while (!parser->is_complete( )) {
-        read = parser->get_next_read( );
+        try {
+            read = parser->get_next_read( );
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
 
         if (graph->check_and_normalize_read( read.sequence )) {
             // TODO: make threadsafe!
@@ -420,16 +432,19 @@ void LabelHash::load_labels_and_tags(std::string filename)
     unsigned long n_labeltags = 1;
     try {
         unsigned int save_ksize = 0;
-	char signature[4];
+        char signature[4];
         unsigned char version = 0, ht_type = 0;
 
-	infile.read(signature, 4);
+        infile.read(signature, 4);
         infile.read((char *) &version, 1);
         infile.read((char *) &ht_type, 1);
-	if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
+        if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Incorrect file signature " << signature
-                << " while reading labels/tags from " << filename
+            err << "Incorrect file signature 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " while reading labels/tags from " << filename
                 << " Should be: " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
diff --git a/lib/labelhash.hh b/lib/labelhash.hh
index d2f754f..da2ee47 100644
--- a/lib/labelhash.hh
+++ b/lib/labelhash.hh
@@ -8,15 +8,29 @@
 #ifndef LABELHASH_HH
 #define LABELHASH_HH
 
+#include <stddef.h>
+#include <stdint.h>
+#include <map>
 #include <string>
+#include <utility>
 
-#include "khmer.hh"
 #include "hashbits.hh"
 #include "hashtable.hh"
+#include "khmer.hh"
 #include "read_parsers.hh"
 
 namespace khmer
 {
+class Hashtable;
+
+namespace read_parsers
+{
+struct IParser;
+}  // namespace read_parsers
+}  // namespace khmer
+
+namespace khmer
+{
 
 class LabelHash
 {
diff --git a/lib/khmer.pc.in b/lib/oxli.pc.in
similarity index 83%
rename from lib/khmer.pc.in
rename to lib/oxli.pc.in
index 4c97777..6cf09fb 100644
--- a/lib/khmer.pc.in
+++ b/lib/oxli.pc.in
@@ -4,11 +4,11 @@ libdir=${exec_prefix}/lib
 sharedlibdir=${libdir}
 includedir=${prefix}/include
 
-Name: khmer
+Name: oxli
 Description: The unsupported core C++ library from the khmer project
 URL: http://khmer.readthedocs.org/
 Version: @VERSION@
 
 Requires:
-Libs: -L${libdir} -L${sharedlibdir} -lkhmer
+Libs: -L${libdir} -L${sharedlibdir} -loxli
 Cflags: -I${includedir}
diff --git a/lib/perf_metrics.cc b/lib/perf_metrics.cc
deleted file mode 100644
index a0c348a..0000000
--- a/lib/perf_metrics.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-#include "perf_metrics.hh"
-
-namespace khmer
-{
-
-#ifdef WITH_INTERNAL_METRICS
-IPerformanceMetrics::
-IPerformanceMetrics( )
-{ }
-
-
-IPerformanceMetrics::
-~IPerformanceMetrics( )
-{ }
-
-
-uint64_t const
-IPerformanceMetrics::
-_timespec_diff_in_nsecs( timespec const &start, timespec const &stop )
-{
-    return
-        ((stop.tv_sec * 1000000000U) + (uint64_t)stop.tv_nsec)
-        -   ((start.tv_sec * 1000000000U) + (uint64_t)start.tv_nsec);
-}
-#endif
-} // namespace khmer
-
-// vim: set ft=cpp sts=4 sw=4 tw=79:
diff --git a/lib/perf_metrics.hh b/lib/perf_metrics.hh
deleted file mode 100644
index 63a0e49..0000000
--- a/lib/perf_metrics.hh
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-#ifndef PERF_METRICS_HH
-#define PERF_METRICS_HH
-
-
-#include <cstring>
-#include <ctime>
-
-
-#include "khmer.hh"
-
-namespace khmer
-{
-
-#ifdef WITH_INTERNAL_METRICS
-struct InvalidPerformanceMetricsKey : public khmer_exception {
-};
-
-
-struct IPerformanceMetrics {
-
-    IPerformanceMetrics( );
-    virtual ~IPerformanceMetrics( );
-
-    inline void	    start_timers( )
-    {
-#if defined (__linux__)
-        clock_gettime( CLOCK_REALTIME, &_temp_clock_start );
-        clock_gettime( CLOCK_THREAD_CPUTIME_ID, &_temp_cpu_start );
-// TODO: Create proper stopwatches for MacOS X.
-#else
-        memset( &_temp_clock_start, 0, sizeof( timespec ) );
-        memset( &_temp_cpu_start, 0, sizeof( timespec ) );
-#endif
-    }
-    inline void	    stop_timers( )
-    {
-#if defined (__linux__)
-        clock_gettime( CLOCK_THREAD_CPUTIME_ID, &_temp_cpu_stop );
-        clock_gettime( CLOCK_REALTIME, &_temp_clock_stop );
-// TODO: Create proper stopwatches for MacOS X.
-#else
-        memset( &_temp_cpu_stop, 0, sizeof( timespec ) );
-        memset( &_temp_clock_stop, 0, sizeof( timespec ) );
-#endif
-    }
-    virtual void    accumulate_timer_deltas( uint32_t metrics_key )	= 0;
-
-    // TODO: Add a printing or log file feature.
-
-protected:
-
-    timespec	_temp_cpu_start;
-    timespec	_temp_cpu_stop;
-    timespec	_temp_clock_start;
-    timespec	_temp_clock_stop;
-
-    uint64_t const  _timespec_diff_in_nsecs(
-        timespec const &start, timespec const &stop
-    );
-
-};
-
-#endif // WITH_INTERNAL_METRICS
-
-} // namespace khmer
-#endif // PERF_METRICS_HH
-
-// vim: set ft=cpp sts=4 sw=4 tw=79:
diff --git a/lib/read_aligner.cc b/lib/read_aligner.cc
index 569cc52..2b9e400 100644
--- a/lib/read_aligner.cc
+++ b/lib/read_aligner.cc
@@ -3,12 +3,54 @@
 // Copyright (C) Michigan State University, 2009-2015. It is licensed under
 // the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 //
-#include "read_aligner.hh"
+#include <ctype.h>
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <memory>
+#include <set>
+#include <utility>
+
+#include "hashtable.hh"
 #include "khmer_exception.hh"
+#include "read_aligner.hh"
 
 namespace khmer
 {
 
+Alignment * _empty_alignment()
+{
+  Alignment* ret = new Alignment;
+  ret->score = -std::numeric_limits<double>::infinity();
+  ret->read_alignment = "";
+  ret->graph_alignment = "";
+  ret->truncated = true;
+  return ret;
+}
+
+static Nucl _ch_to_nucl(char base)
+{
+    base = toupper(base);
+
+    Nucl e = A;
+    switch(base) {
+    case 'A':
+        e = A;
+        break;
+    case 'C':
+        e = C;
+        break;
+    case 'G':
+        e = G;
+        break;
+    case 'T':
+    case 'U':
+        e = T;
+        break;
+    }
+    return e;
+}
+
 struct del_alignment_node_t {
     del_alignment_node_t& operator()(AlignmentNode* p)
     {
@@ -222,6 +264,7 @@ void ReadAligner::Enumerate(
 
             next->score = curr->score + sc + m_sm.tsc[trans];
             next->trusted = (kmerCov >= m_trusted_cutoff);
+            next->cov = kmerCov;
             next->h_score = hcost;
             next->f_score = next->score + next->h_score;
 
@@ -347,6 +390,8 @@ Alignment* ReadAligner::ExtractAlignment(AlignmentNode* node,
     std::string read_alignment = "";
     std::string graph_alignment = "";
     std::string trusted = "";
+    std::vector<BoundedCounterType> covs;
+    size_t farthest_seq_idx = node->seq_idx;
     ret->score = node->score;
     ret->truncated = (node->seq_idx != 0)
                      && (node->seq_idx != read.length() - 1);
@@ -394,6 +439,7 @@ Alignment* ReadAligner::ExtractAlignment(AlignmentNode* node,
             graph_alignment = graph_base + graph_alignment;
             read_alignment = read_base + read_alignment;
             trusted = ((node->trusted)? "T" : "F") + trusted;
+            covs.insert(covs.begin(), node->cov);
         } else {
             graph_alignment = graph_alignment + graph_base;
             read_alignment = read_alignment + read_base;
@@ -405,6 +451,19 @@ Alignment* ReadAligner::ExtractAlignment(AlignmentNode* node,
     ret->graph_alignment = graph_alignment;
     ret->read_alignment = read_alignment;
     ret->trusted = trusted;
+    ret->covs = covs;
+
+    if(ret->truncated) {
+        std::string new_graph_alignment;
+        if (forward) {
+            new_graph_alignment = graph_alignment +
+                                  read.substr(farthest_seq_idx + 1, std::string::npos);
+        } else {
+            new_graph_alignment = read.substr(0, node->seq_idx)
+                                  + graph_alignment;
+        }
+        ret->graph_alignment = new_graph_alignment;
+    }
 
     return ret;
 
@@ -418,7 +477,7 @@ struct SearchStart {
 
 Alignment* ReadAligner::Align(const std::string& read)
 {
-    int k = m_ch->ksize();
+    WordLength k = m_ch->ksize();
     size_t num_kmers = read.length() - k + 1;
 
     SearchStart start;
@@ -436,9 +495,13 @@ Alignment* ReadAligner::Align(const std::string& read)
         }
     }
 
-    if(start.k_cov > 0) {
-        HashIntoType fhash = 0, rhash = 0;
-        _hash(start.kmer.c_str(), k, fhash, rhash);
+    if(start.k_cov == 0) {
+        return _empty_alignment();
+    }
+
+    HashIntoType fhash = 0, rhash = 0;
+    _hash(start.kmer.c_str(), k, fhash, rhash);
+
 #if READ_ALIGNER_DEBUG
         std::cerr << "Starting kmer: " << start.kmer << " "
                   << _revhash(fhash, m_ch->ksize()) << " "
@@ -447,56 +510,41 @@ Alignment* ReadAligner::Align(const std::string& read)
                   << start.kmer_idx + k - 1
                   << " emission: " << start.kmer[k - 1] << std::endl;
 #endif
-        char base = toupper(start.kmer[k - 1]);
-        Nucl e = A;
-        switch(base) {
-        case 'A':
-            e = A;
-            break;
-        case 'C':
-            e = C;
-            break;
-        case 'G':
-            e = G;
-            break;
-        case 'T':
-        case 'U':
-            e = T;
-            break;
-        }
 
-        AlignmentNode startingNode = AlignmentNode(NULL,
-                                     e, start.kmer_idx + k - 1,
-                                     MATCH, MM, fhash, rhash, k);
-        startingNode.f_score = 0;
-        startingNode.h_score = 0;
-        Alignment* forward = NULL;
-        Alignment* reverse = NULL;
-        size_t final_length = 0;
-
-        if(start.k_cov >= m_trusted_cutoff) {
-            startingNode.score = k * m_sm.trusted_match + k * m_sm.tsc[MM];
-        } else {
-            startingNode.score = k * m_sm.untrusted_match + k * m_sm.tsc[MM];
-        }
+    Nucl e = _ch_to_nucl(start.kmer[k - 1]);
+    AlignmentNode startingNode = AlignmentNode(NULL,
+                                               e, start.kmer_idx + k - 1,
+                                               MATCH, MM, fhash, rhash, k);
+    startingNode.f_score = 0;
+    startingNode.h_score = 0;
+    Alignment* forward = NULL;
+    Alignment* reverse = NULL;
+    size_t final_length = 0;
+
+    if(start.k_cov >= m_trusted_cutoff) {
+      startingNode.score = k * m_sm.trusted_match + k * m_sm.tsc[MM];
+    } else {
+      startingNode.score = k * m_sm.untrusted_match + k * m_sm.tsc[MM];
+    }
 
-        forward = Subalign(&startingNode, read.length(), true, read);
-        final_length = forward->read_alignment.length() + k;
+    forward = Subalign(&startingNode, read.length(), true, read);
+    final_length = forward->read_alignment.length() + k;
 
-        startingNode.seq_idx = start.kmer_idx;
-        reverse = Subalign(&startingNode, read.length(), false, read);
-        final_length += reverse->read_alignment.length();
+    startingNode.seq_idx = start.kmer_idx;
+    reverse = Subalign(&startingNode, read.length(), false, read);
+    final_length += reverse->read_alignment.length();
+
+    Alignment* ret = new Alignment;
 
-        Alignment* ret = new Alignment;
-        //We've actually counted the starting node score
-        //twice, so we need to adjust for that
-        ret->score = reverse->score + forward->score - startingNode.score;
-        ret->read_alignment = reverse->read_alignment +
-                              start.kmer + forward->read_alignment;
-        ret->graph_alignment = reverse->graph_alignment +
-                               start.kmer + forward->graph_alignment;
-        ret->score = ret->score -  GetNull(final_length);
-        ret->truncated = forward->truncated || reverse->truncated;
+    // We've actually counted the starting node score
+    // twice, so we need to adjust for that
+    ret->score = reverse->score + forward->score - startingNode.score;
+    ret->read_alignment = reverse->read_alignment +
+        start.kmer + forward->read_alignment;
+    ret->graph_alignment = reverse->graph_alignment +
+        start.kmer + forward->graph_alignment;
+    ret->score = ret->score - GetNull(final_length);
+    ret->truncated = forward->truncated || reverse->truncated;
 
 #if READ_ALIGNER_DEBUG
         fprintf(stderr,
@@ -509,17 +557,84 @@ Alignment* ReadAligner::Align(const std::string& read)
                 reverse->score, reverse->truncated);
 #endif
 
-        delete forward;
-        delete reverse;
-        return ret;
+    delete forward;
+    delete reverse;
+
+    return ret;
+}
+
+Alignment* ReadAligner::AlignForward(const std::string& read)
+{
+    WordLength k = m_ch->ksize();
+
+    // start with seed at position 0
+    SearchStart start;
+    start.kmer = read.substr(0, k);
+    start.kmer_idx = 0;
+    start.k_cov = m_ch->get_count(start.kmer.c_str());
+
+    if(start.k_cov == 0) {
+        return _empty_alignment();
+    }
+
+    HashIntoType fhash = 0, rhash = 0;
+    _hash(start.kmer.c_str(), k, fhash, rhash);
+
+#if READ_ALIGNER_DEBUG
+        std::cerr << "Starting kmer: " << start.kmer << " "
+                  << _revhash(fhash, m_ch->ksize()) << " "
+                  << _revhash(rhash, m_ch->ksize())
+                  << " cov: " << start.k_cov << " idx: " << start.kmer_idx << ", "
+                  << start.kmer_idx + k - 1
+                  << " emission: " << start.kmer[k - 1] << std::endl;
+#endif
+
+    Nucl e = _ch_to_nucl(start.kmer[k - 1]);
+    AlignmentNode startingNode = AlignmentNode(NULL,
+                                               e, start.kmer_idx + k - 1,
+                                               MATCH, MM, fhash, rhash, k);
+    startingNode.f_score = 0;
+    startingNode.h_score = 0;
+    Alignment* forward = NULL;
+    size_t final_length = 0;
+
+    if(start.k_cov >= m_trusted_cutoff) {
+        startingNode.score = k * m_sm.trusted_match + k * m_sm.tsc[MM];
     } else {
+        startingNode.score = k * m_sm.untrusted_match + k * m_sm.tsc[MM];
+    }
 
-        Alignment* ret = new Alignment;
-        ret->score = -std::numeric_limits<double>::infinity();
-        ret->read_alignment = "";
-        ret->graph_alignment = "";
-        ret->truncated = true;
-        return ret;
+    forward = Subalign(&startingNode, read.length(), true, read);
+    final_length = forward->read_alignment.length() + k;
+
+    Alignment* ret = new Alignment;
+
+    ret->score = forward->score;
+    ret->read_alignment = start.kmer + forward->read_alignment;
+    ret->graph_alignment = start.kmer + forward->graph_alignment;
+    ret->score = ret->score - GetNull(final_length);
+    ret->truncated = forward->truncated;
+
+    ret->covs = forward->covs;
+    ret->covs.insert(ret->covs.begin(), start.k_cov);
+    for (WordLength i = 0; i < k - 1; i++) {
+        ret->covs.push_back(0);
     }
+
+#if READ_ALIGNER_DEBUG
+        fprintf(stderr,
+                "FORWARD\n\tread_aln:%s\n\tgraph_aln:%s\n\tscore:%f\n\ttrunc:%d\n",
+                forward->read_alignment.c_str(), forward->graph_alignment.c_str(),
+                forward->score, forward->truncated);
+#endif
+
+    delete forward;
+    return ret;
 }
+
+ScoringMatrix ReadAligner::getScoringMatrix()
+{
+    return m_sm;
+}
+
 }
diff --git a/lib/read_aligner.hh b/lib/read_aligner.hh
index 20b1654..05374b5 100644
--- a/lib/read_aligner.hh
+++ b/lib/read_aligner.hh
@@ -7,15 +7,19 @@
 #ifndef READ_ALIGNER_HH
 #define READ_ALIGNER_HH
 
-#include "khmer.hh"
-#include "counting.hh"
-
-#include <limits>
+#include <math.h>
+#include <stddef.h>
 #include <algorithm>
+#include <limits>
+#include <memory>
+#include <queue>
 #include <set>
+#include <string>
 #include <vector>
-#include <queue>
-#include <memory>
+
+#include "counting.hh"
+#include "khmer.hh"
+#include "kmer_hash.hh"
 
 #define READ_ALIGNER_DEBUG 0
 
@@ -100,6 +104,7 @@ struct AlignmentNode {
     double f_score;
     double h_score;
     bool trusted;
+    BoundedCounterType cov;
 
     size_t num_indels;
 
@@ -111,7 +116,7 @@ struct AlignmentNode {
         :prev(_prev), base(_emission), seq_idx(_seq_idx),
          state(_state), trans(_trans), fwd_hash(_fwd_hash),
          rc_hash(_rc_hash), score(0), f_score(0), h_score(0), trusted(false),
-         num_indels(0), length(_length) {}
+         cov(0), num_indels(0), length(_length) {}
 
     bool operator== (const AlignmentNode& rhs) const
     {
@@ -164,6 +169,7 @@ struct Alignment {
     std::string graph_alignment;
     std::string read_alignment;
     std::string trusted;
+    std::vector<BoundedCounterType> covs;
     double score;
     bool truncated;
 };
@@ -203,9 +209,9 @@ private:
         }
         return ret;
     }
-
 public:
     Alignment* Align(const std::string&);
+    Alignment* AlignForward(const std::string&);
 
     ReadAligner(khmer::CountingHash* ch,
                 BoundedCounterType trusted_cutoff, double bits_theta)
@@ -227,7 +233,20 @@ public:
                   << std::endl;
 #endif
     }
+
+    ReadAligner(khmer::CountingHash* ch,
+                BoundedCounterType trusted_cutoff, double bits_theta,
+                double* scoring_matrix, double* transitions)
+        : bitmask(comp_bitmask(ch->ksize())),
+          rc_left_shift(ch->ksize() * 2 - 2),
+          m_ch(ch), m_sm(scoring_matrix[0], scoring_matrix[1],
+                         scoring_matrix[2], scoring_matrix[3],
+                         transitions),
+          m_trusted_cutoff(trusted_cutoff),
+          m_bits_theta(bits_theta) {};
+
+    ScoringMatrix getScoringMatrix();
+
 };
 }
-
 #endif // READ_ALIGNER_HH
diff --git a/lib/read_parsers.cc b/lib/read_parsers.cc
index 1e8506c..8a9dc14 100644
--- a/lib/read_parsers.cc
+++ b/lib/read_parsers.cc
@@ -5,14 +5,13 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "read_parsers.hh"
+#include <seqan/seq_io.h> // IWYU pragma: keep
+#include <seqan/sequence.h> // IWYU pragma: keep
+#include <seqan/stream.h> // IWYU pragma: keep
+#include <fstream>
 
-#include <cstring>
 #include "khmer_exception.hh"
-#include <seqan/sequence.h>
-#include <seqan/seq_io.h>
-#include <seqan/stream.h>
-#include <pthread.h>
+#include "read_parsers.hh"
 
 namespace khmer
 {
@@ -33,11 +32,11 @@ SeqAnParser::SeqAnParser( char const * filename ) : IParser( )
     if (!seqan::isGood(_private->stream)) {
         std::string message = "Could not open ";
         message = message + filename + " for reading.";
-        throw InvalidStreamHandle(message.c_str());
+        throw InvalidStream(message);
     } else if (seqan::atEnd(_private->stream)) {
         std::string message = "File ";
         message = message + filename + " does not contain any sequences!";
-        throw InvalidStreamHandle(message.c_str());
+        throw InvalidStream(message);
     }
     __asm__ __volatile__ ("" ::: "memory");
     _private->seqan_spin_lock = 0;
@@ -122,7 +121,7 @@ IParser(
             REG_EXTENDED | REG_NOSUB
         );
     if (regex_rc) {
-        throw khmer_exception();
+        throw khmer_exception("Could not compile R2 nosub regex");
     }
     regex_rc =
         regcomp(
@@ -130,7 +129,7 @@ IParser(
             "^.+(/1| 1:[YN]:[[:digit:]]+:[[:alpha:]]+).{0}", REG_EXTENDED
         );
     if (regex_rc) {
-        throw khmer_exception();
+        throw khmer_exception("Could not compile R1 regex");
     }
     regex_rc =
         regcomp(
@@ -138,7 +137,7 @@ IParser(
             "^.+(/2| 2:[YN]:[[:digit:]]+:[[:alpha:]]+).{0}", REG_EXTENDED
         );
     if (regex_rc) {
-        throw khmer_exception();
+        throw khmer_exception("Could not compile R2 regex");
     }
     _num_reads = 0;
     _have_qualities = false;
@@ -169,7 +168,9 @@ imprint_next_read_pair( ReadPair &the_read_pair, uint8_t mode )
         _imprint_next_read_pair_in_error_mode( the_read_pair );
         break;
     default:
-        throw UnknownPairReadingMode( );
+        std::ostringstream oss;
+        oss << "Unknown pair reading mode: " << mode;
+        throw UnknownPairReadingMode(oss.str());
     }
 }
 
diff --git a/lib/read_parsers.hh b/lib/read_parsers.hh
index b8ceadf..13588b0 100644
--- a/lib/read_parsers.hh
+++ b/lib/read_parsers.hh
@@ -9,9 +9,15 @@
 #define READ_PARSERS_HH
 
 #include <regex.h>
-#include <iostream>
+#include <stddef.h>
+#include <stdint.h>
 #include <cstdlib>
+#include <iostream>
+#include <string>
+#include <utility>
+
 #include "khmer.hh"
+#include "khmer_exception.hh"
 
 namespace khmer
 {
@@ -21,32 +27,32 @@ namespace khmer
 namespace read_parsers
 {
 
-struct NoMoreReadsAvailable : public  khmer_exception {
-    explicit NoMoreReadsAvailable(const char *msg) :
-        khmer_exception(msg) {}
+struct NoMoreReadsAvailable : public  khmer_file_exception {
+    explicit NoMoreReadsAvailable(const std::string& msg) :
+        khmer_file_exception(msg) {}
     NoMoreReadsAvailable() :
-        khmer_exception("No more reads available in this stream.") {}
+        khmer_file_exception("No more reads available in this stream.") {}
 };
 
-struct InvalidRead : public  khmer_exception {
-    explicit InvalidRead(const char *msg) :
-        khmer_exception(msg) {}
+struct InvalidRead : public  khmer_value_exception {
+    explicit InvalidRead(const std::string& msg) :
+        khmer_value_exception(msg) {}
     InvalidRead() :
-        khmer_exception("Invalid read") {}
+        khmer_value_exception("Invalid FASTA/Q read") {}
 };
 
-struct UnknownPairReadingMode : public  khmer_exception {
-    explicit UnknownPairReadingMode(const char *msg) :
-        khmer_exception(msg) {}
+struct UnknownPairReadingMode : public  khmer_value_exception {
+    explicit UnknownPairReadingMode(const std::string& msg) :
+        khmer_value_exception(msg) {}
     UnknownPairReadingMode() :
-        khmer_exception("Unknown pair reading mode supplied.") {}
+        khmer_value_exception("Unknown pair reading mode supplied.") {}
 };
 
-struct InvalidReadPair : public  khmer_exception {
-    explicit InvalidReadPair(const char *msg) :
-        khmer_exception(msg) {}
+struct InvalidReadPair : public  khmer_value_exception {
+    explicit InvalidReadPair(const std::string& msg) :
+        khmer_value_exception(msg) {}
     InvalidReadPair() :
-        khmer_exception("Invalid read pair detected.") {}
+        khmer_value_exception("Invalid read pair detected.") {}
 };
 
 struct Read {
@@ -138,7 +144,7 @@ class SeqAnParser : public IParser
 {
 
 public:
-    SeqAnParser( const char * filename );
+    explicit SeqAnParser( const char * filename );
     ~SeqAnParser( );
 
     bool is_complete( );
@@ -146,6 +152,7 @@ public:
 
 private:
     struct Handle;
+
     Handle* _private;
 
 };
diff --git a/lib/subset.cc b/lib/subset.cc
index c62ed39..d5b490b 100644
--- a/lib/subset.cc
+++ b/lib/subset.cc
@@ -5,13 +5,21 @@
 // Contact: khmer-project at idyll.org
 //
 
-#include "hashbits.hh"
-#include "subset.hh"
-#include "read_parsers.hh"
-
-#include <sstream>
-#include <errno.h>
 #include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <iostream>
+#include <sstream> // IWYU pragma: keep
+#include <map>
+#include <set>
+#include <utility>
+
+#include "counting.hh"
+#include "hashtable.hh"
+#include "khmer_exception.hh"
+#include "kmer_hash.hh"
+#include "read_parsers.hh"
+#include "subset.hh"
 
 #define IO_BUF_SIZE 250*1000*1000
 #define BIG_TRAVERSALS_ARE 200
@@ -99,7 +107,12 @@ size_t SubsetPartition::output_partitioned_file(
     //
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
+
         seq = read.sequence;
 
         if (_ht->check_and_normalize_read(seq)) {
@@ -205,7 +218,11 @@ unsigned int SubsetPartition::find_unpart(
     //
 
     while(!parser->is_complete()) {
-        read = parser->get_next_read();
+        try {
+            read = parser->get_next_read();
+        } catch (NoMoreReadsAvailable &exc) {
+            break;
+        }
         seq = read.sequence;
 
         if (_ht->check_and_normalize_read(seq)) {
@@ -1283,8 +1300,11 @@ void SubsetPartition::merge_from_disk(string other_filename)
         infile.read((char *) &ht_type, 1);
         if (!(std::string(signature, 4) == SAVED_SIGNATURE)) {
             std::ostringstream err;
-            err << "Incorrect file signature " << signature
-                << " while reading subset pmap from " << other_filename
+            err << "Incorrect file signature 0x";
+            for(size_t i=0; i < 4; ++i) {
+                err << std::hex << (int) signature[i];
+            }
+            err << " while reading subset pmap from " << other_filename
                 << " Should be: " << SAVED_SIGNATURE;
             throw khmer_file_exception(err.str());
         } else if (!(version == SAVED_FORMAT_VERSION)) {
diff --git a/lib/subset.hh b/lib/subset.hh
index a7d053e..67a5ba7 100644
--- a/lib/subset.hh
+++ b/lib/subset.hh
@@ -8,19 +8,23 @@
 #ifndef SUBSET_HH
 #define SUBSET_HH
 
+#include <stddef.h>
+#include <queue>
+#include <string>
+
 #include "khmer.hh"
 
 namespace khmer
 {
 class CountingHash;
-class Hashtable;
 class Hashbits;
+class Hashtable;
 
 struct pre_partition_info {
     HashIntoType kmer;
     SeenSet tagged_kmers;
 
-    pre_partition_info(HashIntoType _kmer) : kmer(_kmer) {};
+    explicit pre_partition_info(HashIntoType _kmer) : kmer(_kmer) {};
 };
 
 class SubsetPartition
@@ -40,7 +44,7 @@ protected:
                                            const HashIntoType kmer);
 
 public:
-    SubsetPartition(Hashtable * ht) : next_partition_id(2), _ht(ht)
+    explicit SubsetPartition(Hashtable * ht) : next_partition_id(2), _ht(ht)
     {
         ;
     };
diff --git a/lib/test-HashTables.cc b/lib/test-HashTables.cc
deleted file mode 100644
index baa0fd2..0000000
--- a/lib/test-HashTables.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-// Simple C++ implementation of the 'load-graph' Python script.
-// Author: Eric A. McDonald
-
-
-#include <cstring>
-#include <cstdio>
-#include <cerrno>
-#include <cstdlib>
-#include <unistd.h>
-#include <getopt.h>
-
-#include <omp.h>
-
-#define HASH_TYPE_TO_TEST   1 // Counting Hash
-//#define HASH_TYPE_TO_TEST   2 // Bit Hash
-
-// #define OUTPUT_HASHTABLE
-
-
-#include "error.hh"
-#include "read_parsers.hh"
-#if HASH_TYPE_TO_TEST == 1
-#  include "counting.hh"
-#elif HASH_TYPE_TO_TEST == 2
-#  include "hashbits.hh"
-#else
-#  error "No HASH_TYPE_TO_TEST macro defined."
-#endif
-#include "primes.hh"
-
-using namespace std;
-using namespace khmer;
-using namespace khmer:: read_parsers;
-
-
-static const char *	    SHORT_OPTS		= "k:N:x:s:";
-
-
-int main( int argc, char * argv[ ] )
-{
-    unsigned long	kmer_length	    = 32;
-    float		ht_size_FP	    = 1.0E6;
-    unsigned long	ht_count	    = 4;
-    uint64_t		cache_size	    = 4L * 1024 * 1024 * 1024;
-
-    int			rc		    = 0;
-    int			opt		    = -1;
-    char *		conv_residue	    = NULL;
-    string		ofile_name;
-    string		ifile_name;
-    // FILE *		ofile		    = NULL;
-
-    while (-1 != (opt = getopt( argc, argv, SHORT_OPTS ))) {
-
-        switch (opt) {
-
-        case 'k':
-            kmer_length = strtoul( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid kmer length" );
-            }
-            break;
-
-        case 'N':
-            ht_count = strtoul( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid number of hashtables" );
-            }
-            break;
-
-        case 'x':
-            ht_size_FP = strtof( optarg, &conv_residue );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid hashtable size" );
-            }
-            break;
-
-        case 's':
-            cache_size = strtoull( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid cache size" );
-            }
-            break;
-
-        default:
-            error( 0, 0, "Skipping unknown arg, '%c'", optopt );
-        }
-
-    }
-
-    if (optind < argc) {
-        ofile_name = string( argv[ optind++ ] );
-    } else {
-        error( EINVAL, 0, "Output file name required" );
-    }
-
-    if (optind < argc) {
-        ifile_name = string( argv[ optind++ ] );
-    } else {
-        error( EINVAL, 0, "Input file name required" );
-    }
-
-    HashIntoType	    ht_size		= (HashIntoType)ht_size_FP;
-    Primes primetab( ht_size );
-    vector<HashIntoType> ht_sizes;
-    for ( unsigned int i = 0; i < ht_count; ++i ) {
-        ht_sizes.push_back( primetab.get_next_prime( ) );
-    }
-
-    unsigned int	    reads_total		= 0;
-    unsigned long long int  n_consumed		= 0;
-
-    Config		    &the_config		= get_active_config( );
-    the_config.set_number_of_threads( omp_get_max_threads( ) );
-
-#if HASH_TYPE_TO_TEST == 1
-    CountingHash ht( kmer_length, ht_sizes );
-    IParser * parser = IParser:: get_parser(
-                           ifile_name, the_config.get_number_of_threads( ), cache_size
-                       );
-    #pragma omp parallel shared( reads_total, n_consumed )
-    {
-        ht.consume_fasta( parser, reads_total, n_consumed );
-    }
-#elif HASH_TYPE_TO_TEST == 2
-    Hashbits ht( kmer_length, ht_sizes );
-    ht.consume_fasta_and_tag( ifile_name, reads_total, n_consumed );
-#endif
-
-#ifdef OUTPUT_HASHTABLE
-#if	HASH_TYPE_TO_TEST == 1
-    ht.save( ofile_name + ".ht_count" );
-#elif	HASH_TYPE_TO_TEST == 2
-    ht.save( ofile_name + ".ht_bits" );
-    ht.save_tagset( ofile_name + ".tagset" );
-#endif
-#endif
-
-    return rc;
-}
-
-
-// vim: set sts=4 sw=4 tw=80:
diff --git a/lib/test-Parser.cc b/lib/test-Parser.cc
deleted file mode 100644
index 1424cef..0000000
--- a/lib/test-Parser.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-// Test driver for the Parser class.
-// Author: Eric McDonald
-
-
-#include <cerrno>
-#include <cstring>
-#include <cstdio>
-#include <cstdlib>
-#include <fcntl.h>
-#include <getopt.h>
-
-#include <omp.h>
-
-#include "error.hh"
-#include "read_parsers.hh"
-
-
-// #define WRITE_SUMMARY
-
-
-using namespace khmer;
-using namespace khmer:: read_parsers;
-
-
-// s: Cache Size
-static char const *	    SHORT_OPTS	    = "s:";
-
-
-int main( int argc, char * argv[ ] )
-{
-    int		    rc		    = 0;
-    Config	    &the_config	    = get_active_config( );
-    uint64_t	    cache_size	    =
-        the_config.get_reads_input_buffer_size( );
-    char *	    ifile_name	    = NULL;
-
-    int		    opt		    = -1;
-    char *	    conv_residue    = NULL;
-    while (-1 != (opt = getopt( argc, argv, SHORT_OPTS ))) {
-
-        switch (opt) {
-
-        case 's':
-            cache_size = strtoull( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid cache size" );
-            }
-            break;
-
-        default:
-            error( 0, 0, "Skipping unknown arg, '%c'", optopt );
-
-        } // option switch
-
-    } // getopt loop
-
-    if (optind < argc) {
-        ifile_name = argv[ optind++ ];
-    } else {
-        error( EINVAL, 0, "Input file name required" );
-    }
-    std:: string    ifile_name_STRING( ifile_name );
-
-    the_config.set_input_buffer_trace_level( TraceLogger:: TLVL_ALL );
-    uint32_t	    number_of_threads	    = omp_get_max_threads( );
-    IParser *	    parser		    = IParser:: get_parser(
-                                          ifile_name_STRING, number_of_threads, cache_size,
-                                          TraceLogger:: TLVL_DEBUG6
-                                      );
-
-    #pragma omp parallel default( shared )
-    {
-        uint32_t	thread_id	    = (uint32_t)omp_get_thread_num( );
-        Read		the_read;
-        uint64_t	seq_len;
-        char		ofile_name[ FILENAME_MAX + 1 ];
-        FILE *		ofile_handle	    = NULL;
-
-#ifdef WRITE_SUMMARY
-        ofile_name[ FILENAME_MAX ] = '\0';
-#endif
-
-        fprintf(
-            stderr,
-            "OMP thread %lu reporting for duty.\n",
-            (unsigned long int)thread_id
-        );
-
-#ifdef WRITE_SUMMARY
-        snprintf(
-            ofile_name, FILENAME_MAX, "summary-%lu.log",
-            (unsigned long int)thread_id
-        );
-        ofile_handle = fopen( ofile_name, "w" );
-        if (NULL == ofile_handle)
-            // TODO: Report an error.
-            ;
-#endif
-
-        for (uint64_t readnum = 0; !parser->is_complete( ); ++readnum) {
-
-            if (0 == readnum % 100000)
-                fprintf(
-                    stderr,
-                    "OMP thread %lu is on read number %llu.\n",
-                    (unsigned long int)thread_id,
-                    (unsigned long long int)readnum
-                );
-
-            the_read = parser->get_next_read( );
-
-#if (1)
-            printf(
-                "@%s\n%s\n+\n%s\n",
-                the_read.name.c_str( ),
-                the_read.sequence.c_str( ),
-                the_read.accuracy.c_str( )
-            );
-#endif
-
-#ifdef WRITE_SUMMARY
-            fflush( ofile_handle );
-#endif
-
-        }
-
-#ifdef WRITE_SUMMARY
-        fclose( ofile_handle );
-#endif
-
-    } // parallel region
-
-    delete parser;
-    return rc;
-}
-
-
-// vim: set ft=cpp sts=4 sw=4 tw=80:
diff --git a/lib/test-compile.cc b/lib/test-compile.cc
index f0c4bb0..285801f 100644
--- a/lib/test-compile.cc
+++ b/lib/test-compile.cc
@@ -6,9 +6,10 @@
 //
 
 // Author:  Kevin Murray, spam at kdmurray.id.au
-// This file is used to test compilation with libkhmer.a/libkhmer.so
+// This file is used to test compilation with libkhmer.a/libkhmer.so, after
+// installation
 
-#include  <counting.hh>
+#include <oxli/counting.hh>
 
 int main()
 {
diff --git a/lib/trace_logger.cc b/lib/trace_logger.cc
deleted file mode 100644
index 9600d99..0000000
--- a/lib/trace_logger.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-#include <fcntl.h>
-
-#include "trace_logger.hh"
-#include "khmer_exception.hh"
-
-namespace khmer
-{
-
-
-#ifdef WITH_INTERNAL_TRACING
-TraceLogger::
-TraceLogger( uint8_t const level, FILE * stream_handle )
-    : _level( level ), _shared_stream( true ), _stream_handle( stream_handle )
-{
-    if( !(NULL != stream_handle) ) {
-        throw khmer_exception();
-    }
-}
-#endif
-
-
-TraceLogger::
-TraceLogger( uint8_t const level, char const * const file_name_format, ... )
-#ifdef WITH_INTERNAL_TRACING
-    : _level( level ), _shared_stream( false )
-{
-    char	tfile_name[ FILENAME_MAX + 1 ];
-    va_list	varargs;
-
-    va_start( varargs, file_name_format );
-    vsnprintf( tfile_name, FILENAME_MAX, file_name_format, varargs );
-    va_end( varargs );
-
-    _stream_handle = fopen( tfile_name, "w" );
-    if (NULL == _stream_handle) {
-        throw InvalidStreamBuffer( );
-    }
-
-}
-#else	// WITH_INTERNAL_TRACING
-{ }
-#endif	// !WITH_INTERNAL_TRACING
-
-
-TraceLogger::
-~TraceLogger( )
-#ifdef WITH_INTERNAL_TRACING
-{
-
-    if ((!_shared_stream) && (NULL != _stream_handle)) {
-        fclose( _stream_handle );
-        _stream_handle = NULL;
-    }
-
-}
-#else	// WITH_INTERNAL_TRACING
-{ }
-#endif	// !WITH_INTENRAL_TRACING
-
-} // namespace khmer
-
-
-// vim: set ft=cpp sts=4 sw=4 tw=80:
diff --git a/lib/trace_logger.hh b/lib/trace_logger.hh
deleted file mode 100644
index 9c24161..0000000
--- a/lib/trace_logger.hh
+++ /dev/null
@@ -1,76 +0,0 @@
-//
-// This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2015. It is licensed under
-// the three-clause BSD license; see LICENSE.
-// Contact: khmer-project at idyll.org
-//
-
-#ifndef TRACE_LOGGER_HH
-#define TRACE_LOGGER_HH
-
-#include <cstdarg>
-#include <cstdio>
-
-#include "khmer.hh"
-
-
-namespace khmer
-{
-
-
-struct TraceLogger {
-
-    enum {
-        TLVL_ALL	= 0,
-        TLVL_DEBUG9, TLVL_DEBUG8, TLVL_DEBUG7, TLVL_DEBUG6, TLVL_DEBUG5,
-        TLVL_DEBUG4, TLVL_DEBUG3, TLVL_DEBUG2, TLVL_DEBUG1, TLVL_DEBUG0,
-        TLVL_INFO9, TLVL_INFO8, TLVL_INFO7, TLVL_INFO6, TLVL_INFO5,
-        TLVL_INFO4, TLVL_INFO3, TLVL_INFO2, TLVL_INFO1, TLVL_INFO0,
-        TLVL_WARNING	= 30,
-        TLVL_ERROR	= 40,
-        TLVL_CRITICAL	= 50,
-        TLVL_NONE	= 255
-    };
-#ifdef WITH_INTERNAL_TRACING
-    TraceLogger( uint8_t const level, FILE * stream_handle );
-#endif
-    TraceLogger(
-        uint8_t const level, char const * const file_name_format, ...
-    );
-    ~TraceLogger( );
-
-    inline void	    operator( )(
-        uint8_t const level, char const * const format, ...
-    ) const
-#ifdef WITH_INTERNAL_TRACING
-    {
-        va_list varargs;
-
-        if (_level <= level) {
-            va_start( varargs, format );
-            vfprintf( _stream_handle, format, varargs );
-            va_end( varargs );
-            fflush( _stream_handle );
-        }
-
-    }
-#else	// WITH_INTERNAL_TRACING
-    { }
-#endif	// !WITH_INTERNAL_TRACING
-
-private:
-#ifdef WITH_INTERNAL_TRACING
-    bool	    _shared_stream;
-    uint8_t	    _level;
-    FILE *	    _stream_handle;
-#endif
-};
-
-
-
-} // namespace khmer
-
-
-#endif // TRACE_LOGGER_HH
-
-// vim: set ft=cpp sts=4 sw=4 tw=80:
diff --git a/oxli/__init__.py b/oxli/__init__.py
index 742c4fc..53e60ee 100755
--- a/oxli/__init__.py
+++ b/oxli/__init__.py
@@ -11,6 +11,7 @@ Single entry point script for khmer
 """
 
 import argparse
+import sys
 import textwrap
 from khmer import khmer_args
 from oxli import build_graph
@@ -48,7 +49,10 @@ def main():
     """
     main function; does the parsing and kicks off the subcommand
     """
-    args = get_parser().parse_args()
+    if (len(sys.argv) < 2):
+        args = get_parser().parse_args(['--help'])
+    else:
+        args = get_parser().parse_args()
     args.func(args)
 
 if __name__ == '__main__':
diff --git a/oxli/build_graph.py b/oxli/build_graph.py
index 43b5a33..adf4de6 100644
--- a/oxli/build_graph.py
+++ b/oxli/build_graph.py
@@ -5,7 +5,7 @@
 # the three-clause BSD license; see doc/LICENSE.txt.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=invalid-name,missing-docstring
+# pylint: disable=missing-docstring
 """
 Build a graph from the given sequences, save in <ptname>.
 
@@ -20,7 +20,8 @@ import sys
 
 import khmer
 from khmer import khmer_args
-from khmer.khmer_args import (report_on_config, info, add_threading_args)
+from khmer.khmer_args import (report_on_config, info, add_threading_args,
+                              calculate_tablesize)
 from khmer.kfile import check_input_files, check_space
 from khmer.kfile import check_space_for_hashtable
 from oxli import functions
@@ -51,8 +52,11 @@ def main(args):
     for fname in args.input_filenames:
         check_input_files(fname, args.force)
 
-    check_space(args.input_filenames, args.force)
-    check_space_for_hashtable(args, 'nodegraph', args.force)
+    # if optimization args are given, do optimization
+    args = functions.do_sanity_checking(args, 0.01)
+
+    tablesize = calculate_tablesize(args, 'nodegraph')
+    check_space_for_hashtable(args.output_filename, tablesize, args.force)
 
     print('Saving k-mer presence table to %s' % base, file=sys.stderr)
     print('Loading kmers from sequences in %s' %
diff --git a/oxli/functions.py b/oxli/functions.py
index e429efd..5b72be4 100644
--- a/oxli/functions.py
+++ b/oxli/functions.py
@@ -1,3 +1,7 @@
+"""
+A collection of functions for use throughout khmer/oxli
+"""
+
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2015. It is licensed under
@@ -6,51 +10,72 @@
 #
 
 
+from __future__ import print_function
 from collections import namedtuple
 import threading
 import math
 import khmer.utils
+import sys
+
+
+def optimal_size(num_kmers, mem_cap=None, fp_rate=None):
+    """
+    Utility function for estimating optimal counting table args where:
+      - num_kmers: number of unique kmers [required]
+      - mem_cap: the allotted amount of memory [optional, conflicts with f]
+      - fp_rate: the desired false positive rate [optional, conflicts with M]
+    """
+    if all((num_kmers is not None, mem_cap is not None, fp_rate is None)):
+        return estimate_optimal_with_K_and_M(num_kmers, mem_cap)
+    elif all((num_kmers is not None, mem_cap is None, fp_rate is not None)):
+        return estimate_optimal_with_K_and_f(num_kmers, fp_rate)
+    else:
+        raise TypeError("num_kmers and either mem_cap or fp_rate"
+                        " must be defined.")
 
 
-def estimate_optimal_with_N_and_M(N, M):
+def estimate_optimal_with_K_and_M(num_kmers, mem_cap):
     """
-    Utility function for estimating optimal counting table args where N is the
-    number of unique kmer and M is the allotted amount of memory
+    Utility function for estimating optimal counting table args where num_kmers
+    is the number of unique kmer and mem_cap is the allotted amount of memory
     """
-    Z = math.log(2)*(M/float(N))
-    intZ = int(Z)
-    if intZ == 0:
-        intZ = 1
-    H = int(M/intZ)
-    M = H*intZ
-    f2 = (1-math.exp(-N/float(H)))**intZ
+
+    n_tables = math.log(2) * (mem_cap / float(num_kmers))
+    int_n_tables = int(n_tables)
+    if int_n_tables == 0:
+        int_n_tables = 1
+    ht_size = int(mem_cap / int_n_tables)
+    mem_cap = ht_size * int_n_tables
+    fp_rate = (1 - math.exp(-num_kmers / float(ht_size))) ** int_n_tables
     res = namedtuple("result", ["num_htables", "htable_size", "mem_use",
                                 "fp_rate"])
-    return res(intZ, H, M, f2)
+    return res(int_n_tables, ht_size, mem_cap, fp_rate)
 
 
-def estimate_optimal_with_N_and_f(N, f):
+def estimate_optimal_with_K_and_f(num_kmers, des_fp_rate):
     """
-    Utility function for estimating optimal memory where N is the number of
-    unique kmers and f is the desired false positive rate
+    Utility function for estimating optimal memory where num_kmers  is the
+    number of unique kmers and des_fp_rate is the desired false positive rate
     """
-    Z = math.log(f, 0.5)
-    intZ = int(Z)
-    if intZ == 0:
-        intZ = 1
+    n_tables = math.log(des_fp_rate, 0.5)
+    int_n_tables = int(n_tables)
+    if int_n_tables == 0:
+        int_n_tables = 1
 
-    H1 = int(-N/(math.log(1-f**(1/float(intZ)))))
-    M1 = H1 * intZ
-    f1 = (1-math.exp(-N/float(H1)))**intZ
+    ht_size = int(-num_kmers / (
+        math.log(1 - des_fp_rate ** (1 / float(int_n_tables)))))
+    mem_cap = ht_size * int_n_tables
+    fp_rate = (1 - math.exp(-num_kmers / float(ht_size))) ** int_n_tables
 
     res = namedtuple("result", ["num_htables", "htable_size", "mem_use",
                                 "fp_rate"])
-    return res(intZ, H1, M1, f1)
+    return res(int_n_tables, ht_size, mem_cap, fp_rate)
 
 
 def optimal_args_output_gen(unique_kmers, fp_rate):
     """
     Assembles output string for optimal arg sandbox scripts
+    takes in unique_kmers and desired fp_rate
     """
     to_print = []
 
@@ -63,8 +88,12 @@ def optimal_args_output_gen(unique_kmers, fp_rate):
                     'expected_memory_usage')
 
     for fp_rate in range(1, 10):
-        Z, H, M, f = estimate_optimal_with_N_and_f(unique_kmers, fp_rate/10.0)
-        to_print.append('{:11.3f}\t{:19}\t{:17e}\t{:21e}'.format(f, Z, H, M))
+        num_tables, table_size, mem_cap, fp_rate = \
+            optimal_size(unique_kmers, fp_rate=fp_rate / 10.0)
+        to_print.append('{:11.3f}\t{:19}\t{:17e}\t{:21e}'.format(fp_rate,
+                                                                 num_tables,
+                                                                 table_size,
+                                                                 mem_cap))
 
     mem_list = [1, 5, 10, 20, 50, 100, 200, 300, 400, 500, 1000, 2000, 5000]
 
@@ -74,15 +103,61 @@ def optimal_args_output_gen(unique_kmers, fp_rate):
                     'size_hashtable(H)\texpected_fp')
 
     for mem in mem_list:
-        Z, H, M, f = estimate_optimal_with_N_and_M(unique_kmers,
-                                                   mem*1000000000)
-        to_print.append('{:21e}\t{:19}\t{:17e}\t{:11.3f}'.format(M, Z, H, f))
+        num_tables, table_size, mem_cap, fp_rate =\
+            optimal_size(unique_kmers, mem_cap=mem * 1000000000)
+        to_print.append('{:21e}\t{:19}\t{:17e}\t{:11.3f}'.format(mem_cap,
+                                                                 num_tables,
+                                                                 table_size,
+                                                                 fp_rate))
     return "\n".join(to_print)
 
 
+def do_sanity_checking(args, desired_max_fp):
+    """
+    simple function to check if the restrictions in the args (if there are any)
+    make sense--If not, complain. If no restrictions are given, add some that
+    make sense.
+    Takes in args and desired max FP rate
+    """
+    # if optimization args are given, do optimization
+    if args.unique_kmers != 0:
+        if args.max_memory_usage:
+            # verify that this is a sane memory usage restriction
+            res = estimate_optimal_with_K_and_M(args.unique_kmers,
+                                                args.max_memory_usage)
+            if res.fp_rate > desired_max_fp:
+                print("""
+*** ERROR: The given restrictions yield an estimate false positive rate of {0},
+*** which is above the recommended false positive ceiling of {1}!"""
+                      .format(res.fp_rate, desired_max_fp), file=sys.stderr)
+                if not args.force:
+                    print("NOTE: This can be overridden using the --force"
+                          " argument", file=sys.stderr)
+                    print("*** Aborting...!", file=sys.stderr)
+                    sys.exit(1)
+        else:
+            res = estimate_optimal_with_K_and_f(args.unique_kmers,
+                                                desired_max_fp)
+            if args.max_tablesize and args.max_tablesize < res.htable_size:
+                print("*** Warning: The given tablesize is too small!",
+                      file=sys.stderr)
+                print("*** Estimating false positive rate to be {0}".format(
+                    res.fp_rate), file=sys.stderr)
+            else:
+                print("*** INFO: set memory ceiling using auto optimization.",
+                      file=sys.stderr)
+                print("*** Ceiling is: {0} bytes\n".format(res.mem_use),
+                      file=sys.stderr)
+                args.max_mem = res.mem_use
+
+    return args
+
+
 def build_graph(ifilenames, graph, num_threads=1, tags=False):
     """
     Algorithm to construct a counting graph from a set of input files
+    takes in list of input files, existing graph
+    optionally, number of threads and if there should be tags
     """
 
     if tags:
diff --git a/sandbox/Makefile.read_aligner_training b/sandbox/Makefile.read_aligner_training
new file mode 100644
index 0000000..2c2419c
--- /dev/null
+++ b/sandbox/Makefile.read_aligner_training
@@ -0,0 +1,26 @@
+KHMER= ../khmer
+
+KSIZE= 30
+HASH_SIZE= 4e8
+N_HASHES= 4
+
+all: estimated_probabilities.${KSIZE}.json
+
+mockRefG.fa:
+	wget https://github.com/dib-lab/khmer-testdata/raw/master/mockRefG.fa
+
+%.fastq.gz:
+	wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR172/SRR172903/SRR172903.fastq.gz
+	wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR172/SRR172902/SRR172902.fastq.gz
+
+combined_reads.ht: SRR172902.fastq.gz SRR172903.fastq.gz
+	python $(KHMER)/scripts/load-into-counting.py --ksize $(KSIZE) -x $(HASH_SIZE) -N $(N_HASHES) $@ $^
+
+mockRefG.1.bt1: mockRefG.fa
+	bowtie2-build mockRefG.fa
+
+combined_reads_mapping.bam: SRR172902.fastq.gz SRR172903.fastq.gz mockRefG.1.bt1
+	bowtie2 -x mockRefG -U SRR172902.fastq.gz -U SRR172903.fastq.gz | samtools view -S -F4 -b - > combined_reads_mapping.bam
+
+estimated_probabilities.${KSIZE}.json: combined_reads.ht combined_reads_mapping.bam
+	python readaligner_pairhmm_train.py --json combined_reads.ht combined_reads_mapping.bam > estimated_probabilities.${KSIZE}.json
diff --git a/sandbox/README.rst b/sandbox/README.rst
index 48af420..36ffa88 100644
--- a/sandbox/README.rst
+++ b/sandbox/README.rst
@@ -10,9 +10,11 @@ We are still triaging and documenting the various scripts.
 
 ----
 
-Awaiting promotion to sandbox:
+Awaiting promotion to scripts:
 
 * calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference. (Used/tested in `2014 paper on semi-streaming algorithms <https://github.com/ged-lab/2014-streaming/blob/master/>`__)
+* count-kmers.py - output k-mer counts for multiple input files.
+* count-kmers-single.py - output k-mer counts for a single k-mer file.
 * correct-errors.py - streaming error correction.
 * unique-kmers.py - estimate the number of k-mers present in a file with the HyperLogLog low-memory probabilistic cardinality estimation algorithm.
 
diff --git a/sandbox/build-sparse-graph.py b/sandbox/build-sparse-graph.py
index 6686d73..1a12b3b 100755
--- a/sandbox/build-sparse-graph.py
+++ b/sandbox/build-sparse-graph.py
@@ -19,7 +19,7 @@ def main():
     input_fasta = sys.argv[3]
     K = int(sys.argv[1])
     x = float(sys.argv[2])
-    
+
     ht = khmer.Hashbits(K, x, 4)
 
     sparse_graph = gt.Graph()
diff --git a/sandbox/calc-best-assembly.py b/sandbox/calc-best-assembly.py
index 931a24d..e370647 100755
--- a/sandbox/calc-best-assembly.py
+++ b/sandbox/calc-best-assembly.py
@@ -42,22 +42,23 @@ def main():
         stats.append((total, filename))
 
         if not args.quiet:
-            print("assembly %s has %d bp > %d" % (filename,
-                                                                total,
-                                                                args.cutoff), file=sys.stderr)
+            print("assembly %s has %d bp > %d" % (filename, total,
+                                                  args.cutoff),
+                  file=sys.stderr)
 
     stats.sort(reverse=True)
 
     best_total, winner_file = stats[0]
     print('----', file=sys.stderr)
     print("assembly %s wins: %d total bp > %d" % (winner_file,
-                                                                best_total,
-                                                                args.cutoff), file=sys.stderr)
+                                                  best_total,
+                                                  args.cutoff),
+          file=sys.stderr)
 
     if args.output_file:
-        for record in screed.open(winner_file, parse_description=False):
-            print('>%s\n%s' % (record.name,
-                                                   record.sequence), file=args.output_file)
+        for record in screed.open(winner_file):
+            print('>%s\n%s' % (record.name, record.sequence),
+                  file=args.output_file)
 
     print(winner_file)
 
diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
index f02c0ea..ca29727 100755
--- a/sandbox/collect-reads.py
+++ b/sandbox/collect-reads.py
@@ -21,7 +21,8 @@ import sys
 import textwrap
 import khmer
 from khmer import khmer_args
-from khmer.khmer_args import build_counting_args, report_on_config, info
+from khmer.khmer_args import (build_counting_args, report_on_config, info,
+                              calculate_tablesize)
 from khmer.kfile import check_input_files, check_space
 from khmer.kfile import check_space_for_hashtable
 import argparse
@@ -54,7 +55,7 @@ def get_parser():
                         "sequence files.")
     parser.add_argument('--report-total-kmers', '-t', action='store_true',
                         help="Prints the total number of k-mers to stderr")
-    parser.add_argument('-C', '--coverage', type=int,
+    parser.add_argument('-C', '--coverage', type=int, default=50,
                         help='Collect reads until this coverage, then exit.')
     parser.add_argument('-o', '--output', type=argparse.FileType('w'),
                         help='Write collect reads into this file.')
@@ -77,7 +78,9 @@ def main():
         check_input_files(name, False)
 
     check_space(args.input_sequence_filename, False)
-    check_space_for_hashtable(args, 'countgraph', False)
+    tablesize = calculate_tablesize(args, 'countgraph')
+    check_space_for_hashtable(args.output_countingtable_filename, tablesize,
+                              False)
 
     print('Saving k-mer counting table to %s' % base)
     print('Loading sequences from %s' % repr(filenames))
diff --git a/sandbox/collect-variants.py b/sandbox/collect-variants.py
index db368f0..57af85d 100755
--- a/sandbox/collect-variants.py
+++ b/sandbox/collect-variants.py
@@ -46,15 +46,15 @@ def main():
         print(' - kmer size =    %d \t\t(-k)' % args.ksize, file=sys.stderr)
         print(' - n hashes =     %d \t\t(-N)' % args.n_tables, file=sys.stderr)
         print(' - min hashsize = %-5.2g \t(-x)' % \
-            args.min_tablesize, file=sys.stderr)
+            args.max_tablesize, file=sys.stderr)
         print('', file=sys.stderr)
         print('Estimated memory usage is %.2g bytes ' \
             '(n_hashes x min_hashsize)' % \
-            (args.n_tables * args.min_tablesize), file=sys.stderr)
+            (args.n_tables * args.max_tablesize), file=sys.stderr)
         print('-' * 8, file=sys.stderr)
 
     K = args.ksize
-    HT_SIZE = args.min_tablesize
+    HT_SIZE = args.max_tablesize
     N_HT = args.n_tables
 
     filenames = args.input_filenames
diff --git a/sandbox/correct-errors.py b/sandbox/correct-errors.py
deleted file mode 100755
index 71c6890..0000000
--- a/sandbox/correct-errors.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#! /usr/bin/env python
-#
-# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2015. It is licensed under
-# the three-clause BSD license; see LICENSE.
-# Contact: khmer-project at idyll.org
-#
-"""
-Streaming error correction.
-
-% python sandbox/correct-errors.py [ <data1> [ <data2> [ ... ] ] ]
-
-Use -h for parameter help.
-
-TODO: paired support: paired reads should be kept together.
-TODO: load/save counting table.
-TODO: move output_single elsewhere
-TODO: add to sandbox/README
-TODO: change name to correct-reads?
-"""
-from __future__ import print_function
-import sys
-import screed
-import os
-import khmer
-import argparse
-import tempfile
-import shutil
-
-DEFAULT_NORMALIZE_LIMIT = 20
-DEFAULT_CUTOFF = 2
-
-DEFAULT_K = 32
-DEFAULT_N_HT = 4
-DEFAULT_MIN_HASHSIZE = 1e6
-
-
-def output_single(read, new_sequence):
-    name = read.name
-    sequence = new_sequence
-
-    quality = None
-    if hasattr(read, 'quality'):
-        quality = read.quality[:len(sequence)]
-
-        # in cases where sequence _lengthened_, need to truncate it to
-        # match the quality score length.
-        sequence = sequence[:len(quality)]
-
-    if quality:
-        assert len(sequence) == len(quality), (sequence, quality)
-        return "@%s\n%s\n+\n%s\n" % (name, sequence, quality)
-    else:
-        return ">%s\n%s\n" % (name, sequence)
-
-
-def main():
-    parser = argparse.ArgumentParser(description='XXX')
-
-    env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
-    env_n_hashes = os.environ.get('KHMER_N_HASHES', DEFAULT_N_HT)
-    env_hashsize = os.environ.get('KHMER_MIN_HASHSIZE', DEFAULT_MIN_HASHSIZE)
-
-    parser.add_argument('--ksize', '-k', type=int, dest='ksize',
-                        default=env_ksize,
-                        help='k-mer size to use')
-    parser.add_argument('--n_hashes', '-N', type=int, dest='n_hashes',
-                        default=env_n_hashes,
-                        help='number of hash tables to use')
-    parser.add_argument('--hashsize', '-x', type=float, dest='min_hashsize',
-                        default=env_hashsize,
-                        help='lower bound on hashsize to use')
-
-    parser.add_argument("--trusted-cov", dest="trusted_cov", type=int,
-                        default=DEFAULT_CUTOFF)
-    parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)
-
-    parser.add_argument('--normalize-to', '-Z', type=int, dest='normalize_to',
-                        help='base cutoff on median k-mer abundance of this',
-                        default=DEFAULT_NORMALIZE_LIMIT)
-
-    parser.add_argument('--tempdir', '-T', type=str, dest='tempdir',
-                        default='./')
-
-    parser.add_argument('input_filenames', nargs='+')
-    args = parser.parse_args()
-
-    K = args.ksize
-    HT_SIZE = args.min_hashsize
-    N_HT = args.n_hashes
-
-    NORMALIZE_LIMIT = args.normalize_to
-
-    print('making hashtable')
-    ht = khmer.CountingHash(K, HT_SIZE, N_HT)
-
-    aligner = khmer.ReadAligner(ht, args.trusted_cov, args.bits_theta)
-
-    tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir)
-    print('created temporary directory %s; use -T to change location' % tempdir)
-
-    ###
-
-    save_pass2 = 0
-    n_aligned = 0
-    n_corrected = 0
-    total_reads = 0
-
-    pass2list = []
-    for filename in args.input_filenames:
-        pass2filename = os.path.basename(filename) + '.pass2'
-        pass2filename = os.path.join(tempdir, pass2filename)
-        corrfilename = os.path.basename(filename) + '.corr'
-
-        pass2list.append((filename, pass2filename, corrfilename))
-
-        pass2fp = open(pass2filename, 'w')
-        corrfp = open(corrfilename, 'w')
-
-        for n, read in enumerate(screed.open(filename)):
-            total_reads += 1
-
-            if n % 10000 == 0:
-                print('...', n, filename, n_aligned, n_corrected, save_pass2, \
-                      total_reads)
-            seq = read.sequence.replace('N', 'A')
-
-            # build the alignment...
-            score, graph_alignment, read_alignment, truncated = \
-                aligner.align(read.sequence)
-
-            # next, decide whether or to keep it.
-            output_corrected = False
-            if not truncated:
-                n_aligned += 1
-
-                # build a better sequence -- this is the corrected one.
-                if True:
-                    graph_seq = graph_alignment.replace("-", "")
-                else:
-                    graph_seq = ""
-                    for i in range(len(graph_alignment)):
-                        if graph_alignment[i] == "-":
-                            graph_seq += read_alignment[i]
-                        else:
-                            graph_seq += graph_alignment[i]
-
-                corrected = graph_seq
-                if graph_seq != read.sequence:
-                    n_corrected += 1
-
-                # get the minimum count for this new sequence
-                mincount = ht.get_min_count(graph_seq)
-                if mincount < args.normalize_to:
-                    output_corrected = True
-
-            # has this portion of the graph saturated? if not,
-            # consume & save => pass2.
-            if output_corrected:
-                corrfp.write(output_single(read, corrected))
-            else:  # uncorrected...
-                ht.consume(read.sequence)
-                pass2fp.write(output_single(read, read.sequence))
-                save_pass2 += 1
-
-        pass2fp.close()
-        corrfp.close()
-
-        print('%s: kept aside %d of %d from first pass, in %s' % \
-              (filename, save_pass2, n, filename))
-        print('aligned %d of %d reads so far' % (n_aligned, total_reads))
-        print('changed %d of %d reads so far' % (n_corrected, total_reads))
-
-    for orig_filename, pass2filename, corrfilename in pass2list:
-        print('second pass: looking at sequences kept aside in %s' % \
-              pass2filename)
-        for n, read in enumerate(screed.open(pass2filename)):
-            if n % 10000 == 0:
-                print('... x 2', n, pass2filename, n_aligned, n_corrected, \
-                      total_reads)
-
-            corrfp = open(corrfilename, 'a')
-
-            # build the alignment...
-            score, graph_alignment, read_alignment, truncated = \
-                aligner.align(read.sequence)
-
-            if truncated:               # no good alignment; output original
-                corrected = read.sequence
-            else:
-                n_aligned += 1
-                # build a better sequence -- this is the corrected one.
-                if True:
-                    graph_seq = graph_alignment.replace("-", "")
-                else:
-                    graph_seq = ""
-                    for i in range(len(graph_alignment)):
-                        if graph_alignment[i] == "-":
-                            graph_seq += read_alignment[i]
-                        else:
-                            graph_seq += graph_alignment[i]
-
-                corrected = graph_seq
-                if corrected != read.sequence:
-                    n_corrected += 1
-
-            corrfp.write(output_single(read, corrected))
-
-        print('removing %s' % pass2filename)
-        os.unlink(pass2filename)
-
-    print('removing temp directory & contents (%s)' % tempdir)
-    shutil.rmtree(tempdir)
-
-    print('Aligned %d of %d total' % (n_aligned, total_reads))
-    print('Changed %d of %d total' % (n_corrected, total_reads))
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/trim-low-abund.py b/sandbox/correct-reads.py
similarity index 52%
copy from scripts/trim-low-abund.py
copy to sandbox/correct-reads.py
index 741b181..a86f9be 100755
--- a/scripts/trim-low-abund.py
+++ b/sandbox/correct-reads.py
@@ -1,20 +1,21 @@
-#! /usr/bin/env python
+#! /usr/bin/env python2
 #
-# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2015. It is licensed under
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
-Trim sequences at k-mers of the given abundance, using a streaming algorithm.
+Semi-streaming error correction.
 
-Output sequences will be placed in 'infile.abundtrim'.
+Output sequences will be placed in 'infile.corr'.
 
-% python scripts/trim-low-abund.py [ <data1> [ <data2> [ ... ] ] ]
+% python scripts/correct-reads.py [ <data1> [ <data2> [ ... ] ] ]
 
 Use -h for parameter help.
+
+TODO: add to sandbox/README.
 """
-from __future__ import print_function
 import sys
 import screed
 import os
@@ -24,9 +25,6 @@ import shutil
 import textwrap
 import argparse
 
-from screed import Record
-from khmer import khmer_args
-
 from khmer.khmer_args import (build_counting_args, info, add_loadhash_args,
                               report_on_config)
 from khmer.utils import write_record, write_record_pair, broken_paired_reader
@@ -37,46 +35,52 @@ DEFAULT_NORMALIZE_LIMIT = 20
 DEFAULT_CUTOFF = 2
 
 
-def trim_record(read, trim_at):
-    new_read = Record()
-    new_read.name = read.name
-    new_read.sequence = read.sequence[:trim_at]
-    if hasattr(read, 'quality'):
-        new_read.quality = read.quality[:trim_at]
+def correct_sequence(aligner, sequence):
+    # align to graph.
+    score, graph_alignment, read_alignment, truncated = \
+           aligner.align(sequence)
+
+    # next, decide whether or to keep it.
+    output_corrected = False
+    if not truncated:
+        graph_seq = graph_alignment.replace("-", "")
+        return True, graph_seq
+
+    return False, sequence
+
+
+def fix_quality(record):
+    if len(record.sequence) < len(record.quality):
+        record.quality = record.quality[:len(record.sequence)]
 
-    return new_read
+    while len(record.sequence) > len(record.quality):
+        record.quality += 'I' # @CTB hack
 
 
 def get_parser():
     epilog = """
-    The output is one file for each input file, <input file>.abundtrim, placed
-    in the current directory.  This output contains the input sequences
-    trimmed at low-abundance k-mers.
-
-    The ``-V/--variable-coverage`` parameter will, if specified,
-    prevent elimination of low-abundance reads by only trimming
-    low-abundance k-mers from high-abundance reads; use this for
-    non-genomic data sets that may have variable coverage.
-
-    Note that the output reads will not necessarily be in the same order
-    as the reads in the input files; if this is an important consideration,
-    use ``load-into-counting.py`` and ``filter-abund.py``.  However, read
-    pairs will be kept together, in "broken-paired" format; you can use
+    The output is one file for each input file, <input file>.corr, placed
+    in the current directory.  This output contains the input sequences,
+    corrected at low-abundance k-mers.
+
+    Note that the output reads will not necessarily be in the same
+    order as the reads in the input files. However, read pairs will be
+    kept together, in "broken-paired" format; you can use
     ``extract-paired-reads.py`` to extract read pairs and orphans.
 
     Example::
 
-        trim-low-abund.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
+        correct-reads.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
     """
 
     parser = build_counting_args(
-        descr='Trim low-abundance k-mers using a streaming algorithm.',
+        descr='Correct reads using a semi-streaming algorithm.',
         epilog=textwrap.dedent(epilog))
 
     parser.add_argument('input_filenames', nargs='+')
 
     parser.add_argument('--cutoff', '-C', type=int,
-                        help='remove k-mers below this abundance',
+                        help='k-mers below this abundance are not trusted',
                         default=DEFAULT_CUTOFF)
 
     parser.add_argument('--normalize-to', '-Z', type=int,
@@ -92,8 +96,7 @@ def get_parser():
 
     parser.add_argument('--variable-coverage', '-V', action='store_true',
                         default=False,
-                        help='Only trim low-abundance k-mers from sequences '
-                        'that have high coverage.')
+                        help='Only correct sequences that have high coverage.')
 
     add_loadhash_args(parser)
     parser.add_argument('-s', '--savetable', metavar="filename", default='',
@@ -104,20 +107,21 @@ def get_parser():
     parser.add_argument('--force', default=False, action='store_true')
     parser.add_argument('--ignore-pairs', default=False, action='store_true')
     parser.add_argument('--tempdir', '-T', type=str, default='./')
+    parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)
 
     return parser
 
 
 def main():
-    info('trim-low-abund.py', ['streaming'])
+    info('correct-reads.py', ['streaming'])
     parser = get_parser()
     args = parser.parse_args()
 
     ###
 
     if len(set(args.input_filenames)) != len(args.input_filenames):
-        print("Error: Cannot input the same filename multiple times.",
-              file=sys.stderr)
+        print >>sys.stderr, \
+            "Error: Cannot input the same filename multiple times."
         sys.exit(1)
 
     ###
@@ -126,22 +130,26 @@ def main():
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        check_space_for_hashtable(
+            args.n_tables * args.min_tablesize, args.force)
 
-    if args.loadtable:
-        print('loading countgraph from', args.loadtable, file=sys.stderr)
-        ct = khmer.load_counting_hash(args.loadtable)
-    else:
-        print('making countgraph', file=sys.stderr)
-        ct = khmer_args.create_countgraph(args)
+    K = args.ksize
 
-    K = ct.ksize()
     CUTOFF = args.cutoff
     NORMALIZE_LIMIT = args.normalize_to
 
+    if args.loadtable:
+        print >>sys.stderr, 'loading k-mer counting table from', args.loadtable
+        ct = khmer.load_counting_hash(args.loadtable)
+    else:
+        print >>sys.stderr, 'making k-mer counting table'
+        ct = khmer.new_counting_hash(K, args.min_tablesize, args.n_tables)
+
     tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir)
-    print('created temporary directory %s; '
-          'use -T to change location' % tempdir, file=sys.stderr)
+    print >>sys.stderr, 'created temporary directory %s; ' \
+                        'use -T to change location' % tempdir
+
+    aligner = khmer.ReadAligner(ct, args.cutoff, args.bits_theta)
 
     # ### FIRST PASS ###
 
@@ -151,18 +159,18 @@ def main():
     n_reads = 0
     written_bp = 0
     written_reads = 0
-    trimmed_reads = 0
+    corrected_reads = 0
 
     pass2list = []
     for filename in args.input_filenames:
         pass2filename = os.path.basename(filename) + '.pass2'
         pass2filename = os.path.join(tempdir, pass2filename)
         if args.out is None:
-            trimfp = open(os.path.basename(filename) + '.abundtrim', 'w')
+            corrfp = open(os.path.basename(filename) + '.corr', 'w')
         else:
-            trimfp = args.out
+            corrfp = args.out
 
-        pass2list.append((filename, pass2filename, trimfp))
+        pass2list.append((filename, pass2filename, corrfp))
 
         screed_iter = screed.open(filename, parse_description=False)
         pass2fp = open(pass2filename, 'w')
@@ -174,8 +182,8 @@ def main():
                                            force_single=args.ignore_pairs)
         for n, is_pair, read1, read2 in paired_iter:
             if n % 10000 == 0:
-                print('...', n, filename, save_pass2, n_reads, n_bp,
-                      written_reads, written_bp, file=sys.stderr)
+                print >>sys.stderr, '...', n, filename, save_pass2, \
+                    n_reads, n_bp, written_reads, written_bp
 
             # we want to track paired reads here, to make sure that pairs
             # are not split between first pass and second pass.
@@ -196,23 +204,26 @@ def main():
                     write_record_pair(read1, read2, pass2fp)
                     save_pass2 += 2
                 else:
-                    _, trim_at1 = ct.trim_on_abundance(seq1, CUTOFF)
-                    _, trim_at2 = ct.trim_on_abundance(seq2, CUTOFF)
-
-                    if trim_at1 >= K:
-                        read1 = trim_record(read1, trim_at1)
-
-                    if trim_at2 >= K:
-                        read2 = trim_record(read2, trim_at2)
-
-                    if trim_at1 != len(seq1):
-                        trimmed_reads += 1
-                    if trim_at2 != len(seq2):
-                        trimmed_reads += 1
-
-                    write_record_pair(read1, read2, trimfp)
+                    is_aligned, new_seq1 = correct_sequence(aligner, seq1)
+                    if is_aligned:
+                        if new_seq1 != read1.sequence:
+                            corrected_reads += 1
+                        read1.sequence = new_seq1
+                        if hasattr(read1, 'quality'):
+                            fix_quality(read1)
+
+                    is_aligned, new_seq2 = correct_sequence(aligner, seq2)
+                    if is_aligned:
+                        if new_seq2 != read2.sequence:
+                            corrected_reads += 1
+                        read2.sequence = new_seq2
+                        if hasattr(read2, 'quality'):
+                            fix_quality(read2)
+
+                    write_record_pair(read1, read2, corrfp)
                     written_reads += 2
-                    written_bp += trim_at1 + trim_at2
+                    written_bp += len(read1)
+                    written_bp += len(read2)
             else:
                 n_reads += 1
                 n_bp += len(read1.sequence)
@@ -228,32 +239,32 @@ def main():
                     write_record(read1, pass2fp)
                     save_pass2 += 1
                 else:                       # trim!!
-                    _, trim_at = ct.trim_on_abundance(seq, CUTOFF)
-                    if trim_at >= K:
-                        new_read = trim_record(read1, trim_at)
-                        write_record(new_read, trimfp)
+                    is_aligned, new_seq = correct_sequence(aligner, seq)
+                    if is_aligned:
+                        if new_seq != read1.sequence:
+                            corrected_reads += 1
+                        read1.sequence = new_seq
+                        if hasattr(read1, 'quality'):
+                            fix_quality(read1)
 
-                        written_reads += 1
-                        written_bp += trim_at
+                        write_record(read1, corrfp)
 
-                        if trim_at != len(read1.sequence):
-                            trimmed_reads += 1
+                        written_reads += 1
+                        written_bp += len(new_seq)
 
         pass2fp.close()
 
-        print('%s: kept aside %d of %d from first pass, in %s' %
-              (filename, save_pass2, n, filename),
-              file=sys.stderr)
+        print >>sys.stderr, '%s: kept aside %d of %d from first pass, in %s' \
+            % (filename, save_pass2, n, filename)
         save_pass2_total += save_pass2
 
     # ### SECOND PASS. ###
 
     skipped_n = 0
     skipped_bp = 0
-    for _, pass2filename, trimfp in pass2list:
-        print('second pass: looking at sequences kept aside in %s' %
-              pass2filename,
-              file=sys.stderr)
+    for _, pass2filename, corrfp in pass2list:
+        print >>sys.stderr, ('second pass: looking at sequences kept aside '
+                             'in %s') % pass2filename
 
         # note that for this second pass, we don't care about paired
         # reads - they will be output in the same order they're read in,
@@ -263,73 +274,72 @@ def main():
         for n, read in enumerate(screed.open(pass2filename,
                                              parse_description=False)):
             if n % 10000 == 0:
-                print('... x 2', n, pass2filename,
-                      written_reads, written_bp, file=sys.stderr)
+                print >>sys.stderr, '... x 2', n, pass2filename, \
+                    written_reads, written_bp
 
             seq = read.sequence.replace('N', 'A')
             med, _, _ = ct.get_median_count(seq)
 
             # do we retain low-abundance components unchanged?
             if med < NORMALIZE_LIMIT and args.variable_coverage:
-                write_record(read, trimfp)
+                write_record(read, corrfp)
 
                 written_reads += 1
                 written_bp += len(read.sequence)
                 skipped_n += 1
                 skipped_bp += len(read.sequence)
 
-            # otherwise, examine/trim/truncate.
+            # otherwise, examine/correct.
             else:    # med >= NORMALIZE LIMIT or not args.variable_coverage
-                _, trim_at = ct.trim_on_abundance(seq, CUTOFF)
-                if trim_at >= K:
-                    new_read = trim_record(read, trim_at)
-                    write_record(new_read, trimfp)
+                is_aligned, new_seq = correct_sequence(aligner, seq)
+                if is_aligned:
+                    if new_seq != read.sequence:
+                        corrected_reads += 1
+                    read.sequence = new_seq
+                    if hasattr(read, 'quality'):
+                        fix_quality(read)
+                    write_record(read, corrfp)
 
                     written_reads += 1
-                    written_bp += trim_at
-
-                    if trim_at != len(read.sequence):
-                        trimmed_reads += 1
+                    written_bp += len(new_seq)
 
-        print('removing %s' % pass2filename, file=sys.stderr)
+        print >>sys.stderr, 'removing %s' % pass2filename
         os.unlink(pass2filename)
 
-    print('removing temp directory & contents (%s)' % tempdir, file=sys.stderr)
+    print >>sys.stderr, 'removing temp directory & contents (%s)' % tempdir
     shutil.rmtree(tempdir)
 
     n_passes = 1.0 + (float(save_pass2_total) / n_reads)
-    percent_reads_trimmed = float(trimmed_reads + (n_reads - written_reads)) /\
+    percent_reads_corrected = float(corrected_reads +
+                                    (n_reads - written_reads)) /\
         n_reads * 100.0
 
-    print('read %d reads, %d bp' % (n_reads, n_bp,))
-    print('wrote %d reads, %d bp' % (written_reads, written_bp,))
-    print('looked at %d reads twice (%.2f passes)' % (save_pass2_total,
-                                                      n_passes))
-    print('removed %d reads and trimmed %d reads (%.2f%%)' %
-          (n_reads - written_reads, trimmed_reads, percent_reads_trimmed))
-    print('trimmed or removed %.2f%% of bases (%d total)' %
-          ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp))
+    print >>sys.stderr, 'read %d reads, %d bp' % (n_reads, n_bp,)
+    print >>sys.stderr, 'wrote %d reads, %d bp' % (written_reads, written_bp,)
+    print >>sys.stderr, 'looked at %d reads twice (%.2f passes)' % \
+        (save_pass2_total, n_passes)
+    print >>sys.stderr, 'removed %d reads and corrected %d reads (%.2f%%)' % \
+        (n_reads - written_reads, corrected_reads, percent_reads_corrected)
+    print >>sys.stderr, 'removed %.2f%% of bases (%d total)' % \
+        ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp)
 
     if args.variable_coverage:
         percent_reads_hicov = 100.0 * float(n_reads - skipped_n) / n_reads
-        print('%d reads were high coverage (%.2f%%);' % (n_reads - skipped_n,
-                                                         percent_reads_hicov),
-              file=sys.stderr)
-        print('skipped %d reads/%d bases because of low coverage' %
-              (skipped_n, skipped_bp),
-              file=sys.stderr)
+        print >>sys.stderr, '%d reads were high coverage (%.2f%%);' % \
+            (n_reads - skipped_n, percent_reads_hicov)
+        print >>sys.stderr, ('skipped %d reads/%d bases because of low'
+                             'coverage') % (skipped_n, skipped_bp)
 
     fp_rate = \
         khmer.calc_expected_collisions(ct, args.force, max_false_pos=.8)
     # for max_false_pos see Zhang et al., http://arxiv.org/abs/1309.2975
-    print('fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate),
-          file=sys.stderr)
+    print >>sys.stderr, \
+        'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
 
-    print('output in *.abundtrim', file=sys.stderr)
+    print >>sys.stderr, 'output in *.corr'
 
     if args.savetable:
-        print("Saving k-mer counting table to",
-              args.savetable, file=sys.stderr)
+        print >>sys.stderr, "Saving k-mer counting table to", args.savetable
         ct.save(args.savetable)
 
 
diff --git a/sandbox/count-kmers-single.py b/sandbox/count-kmers-single.py
new file mode 100755
index 0000000..7cb49c1
--- /dev/null
+++ b/sandbox/count-kmers-single.py
@@ -0,0 +1,103 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) University of California, Davis, 2015. It is licensed under
+# the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+# pylint: disable=missing-docstring,invalid-name
+"""
+Produce k-mer counts for all the k-mers in the given sequence file,
+using the given counting table.
+
+% python sandbox/count-kmers-single.py <fasta/fastq>
+
+Use '-h' for parameter help.
+"""
+from __future__ import print_function
+
+import sys
+import khmer
+import argparse
+import screed
+import csv
+from khmer.khmer_args import (build_counting_args, report_on_config, info,
+                              add_threading_args)
+from khmer.kfile import (check_input_files, check_space,
+                         check_space_for_hashtable)
+import threading
+
+
+def get_parser():
+    parser = build_counting_args(
+        descr="Output abundances of the k-mers in the sequence file.")
+    add_threading_args(parser)
+
+    parser.add_argument('input_sequence_filename', help='The input'
+                        ' FAST[AQ] sequence file.')
+
+    parser.add_argument('-o', '--out', metavar="output_file",
+                        dest='output_file',
+                        type=argparse.FileType('w'),
+                        default=None, help='output counts to this file')
+
+    return parser
+
+
+def main():
+    info('count-kmers-single.py', ['counting'])
+    args = get_parser().parse_args()
+
+    check_input_files(args.input_sequence_filename, False)
+
+    print ('making k-mer counting table', file=sys.stderr)
+    counting_hash = khmer.CountingHash(args.ksize, args.max_tablesize,
+                                            args.n_tables)
+    # @CTB counting_hash.set_use_bigcount(args.bigcount)
+
+    kmer_size = counting_hash.ksize()
+    hashsizes = counting_hash.hashsizes()
+    tracking = khmer._Hashbits(  # pylint: disable=protected-access
+        kmer_size, hashsizes)
+
+    print ('kmer_size: %s' % counting_hash.ksize(), file=sys.stderr)
+    print ('k-mer counting table sizes: %s' % (counting_hash.hashsizes(),),
+           file=sys.stderr)
+
+    if args.output_file is None:
+        args.output_file = sys.stdout
+    writer = csv.writer(args.output_file)
+
+    # start loading
+    rparser = khmer.ReadParser(args.input_sequence_filename)
+    threads = []
+    print ('consuming input, round 1 -- %s' % (args.input_sequence_filename),
+           file=sys.stderr)
+    for _ in range(args.threads):
+        thread = \
+            threading.Thread(
+                target=counting_hash.consume_fasta_with_reads_parser,
+                args=(rparser, )
+            )
+        threads.append(thread)
+        thread.start()
+
+    for thread in threads:
+        thread.join()
+
+    for record in screed.open(args.input_sequence_filename):
+        seq = record.sequence.replace('N', 'A')
+        for i in range(len(seq) - kmer_size + 1):
+            kmer = seq[i:i+kmer_size]
+            if not tracking.get(kmer):
+                tracking.count(kmer)
+                writer.writerow([kmer, str(counting_hash.get(kmer))])
+
+    print ('Total number of unique k-mers: {0}'.format(
+        counting_hash.n_unique_kmers()), file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
+
+# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/count-kmers.py b/sandbox/count-kmers.py
new file mode 100644
index 0000000..0d736da
--- /dev/null
+++ b/sandbox/count-kmers.py
@@ -0,0 +1,80 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) University of California, Davis, 2015. It is licensed under
+# the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+# pylint: disable=missing-docstring,invalid-name
+"""
+Produce k-mer counts for all the k-mers in the given sequence file,
+using the given counting table.
+
+% python sandbox/count-kmers.py <ct> <fasta/fastq> [ <fasta/fastq> ... ]
+
+Use '-h' for parameter help.
+"""
+from __future__ import print_function
+
+import sys
+import khmer
+import argparse
+import screed
+import csv
+from khmer.khmer_args import info
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Output abundances of the k-mers in "
+        "the sequence files using a pre-made k-mer counting table.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('input_counting_table_filename', help='The name of the'
+                        ' input k-mer counting table file.')
+    parser.add_argument('input_sequence_filenames', help='The input'
+                        ' FAST[AQ] sequence file(s).', nargs='+')
+
+    parser.add_argument('-o', '--out', metavar="output_file",
+                        dest='output_file',
+                        type=argparse.FileType('w'),
+                        default=None, help='output counts to this file')
+
+    return parser
+
+
+def main():
+    info('count-kmers.py', ['counting'])
+    args = get_parser().parse_args()
+
+    print ('hashtable from', args.input_counting_table_filename,
+           file=sys.stderr)
+    counting_hash = khmer.load_counting_hash(
+        args.input_counting_table_filename)
+
+    kmer_size = counting_hash.ksize()
+    hashsizes = counting_hash.hashsizes()
+    tracking = khmer._Hashbits(  # pylint: disable=protected-access
+        kmer_size, hashsizes)
+
+    if args.output_file is None:
+        args.output_file = sys.stdout
+    writer = csv.writer(args.output_file)
+
+    for filename in args.input_sequence_filenames:
+        for record in screed.open(filename):
+            seq = record.sequence.replace('N', 'A')
+            for i in range(len(seq) - kmer_size + 1):
+                kmer = seq[i:i+kmer_size]
+                if not tracking.get(kmer):
+                    tracking.count(kmer)
+                    writer.writerow([kmer, str(counting_hash.get(kmer))])
+
+    print ('Total number of unique k-mers: {0}'.format(
+        counting_hash.n_unique_kmers()), file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
+
+# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/error-correct-pass2.py b/sandbox/error-correct-pass2.py
new file mode 100755
index 0000000..466848f
--- /dev/null
+++ b/sandbox/error-correct-pass2.py
@@ -0,0 +1,94 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2013. It is licensed under
+# the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+"""
+Error correct reads based on a counting hash from a diginorm step.
+Output sequences will be put in @@@.
+
+% python scripts/error-correct-pass2 <counting.ct> <data1> [ <data2> <...> ]
+
+Use '-h' for parameter help.
+"""
+from __future__ import print_function
+import sys
+import screed
+import os
+import khmer
+import argparse
+
+
+###
+
+DEFAULT_CUTOFF = 2
+
+def output_single(read, new_sequence):
+    name = read.name
+    sequence = new_sequence
+
+    quality = None
+    if hasattr(read, 'quality'):
+        quality = read.quality[:len(sequence)]
+        sequence = sequence[:len(quality)] # in cases where sequence _lengthened_
+
+    if quality:
+        assert len(sequence) == len(quality), (sequence, quality)
+        return "@%s\n%s\n+\n%s\n" % (name, sequence, quality)
+    else:
+        return ">%s\n%s\n" % (name, sequence)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--trusted-cov", dest="trusted_cov", type=int,
+                        default=DEFAULT_CUTOFF)
+    parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)
+    parser.add_argument('-o', '--output', dest='output_file',
+                        help="output file for histogram; defaults to "
+                             "<first filename>.errhist in cwd.",
+                        type=argparse.FileType('w'), default=None)
+
+    parser.add_argument('counts_table')
+    parser.add_argument('readfile')
+    
+    args = parser.parse_args()
+
+    print('loading counts')
+    ht = khmer.load_counting_hash(args.counts_table)
+
+    aligner = khmer.ReadAligner(ht,
+                                args.trusted_cov,
+                                args.bits_theta)
+
+    print("trusted:", args.trusted_cov)
+
+    corrfp = args.output_file
+    if not corrfp:
+        outfile = os.path.basename(args.readfile) + '.corr'
+        corrfp = open(outfile, 'w')
+
+    n_corrected = 0
+    for n, read in enumerate(screed.open(args.readfile)):
+        if n % 10000 == 0:
+            print('...', n, n_corrected, file=sys.stderr)
+        seq = read.sequence.replace('N', 'A')
+
+        # build the alignment...
+        score, graph_alignment, read_alignment, truncated = \
+               aligner.align(seq)
+        
+        if not truncated:
+            graph_seq = graph_alignment.replace("-", "")
+            if graph_seq != seq:
+                n_corrected += 1
+
+            seq = graph_seq
+
+        corrfp.write(output_single(read, seq))
+
+if __name__ == '__main__':
+    main()
diff --git a/sandbox/estimate_optimal_hash.py b/sandbox/estimate_optimal_hash.py
index 63e4932..3f3944e 100755
--- a/sandbox/estimate_optimal_hash.py
+++ b/sandbox/estimate_optimal_hash.py
@@ -30,8 +30,7 @@ from __future__ import print_function
 import argparse
 import khmer, oxli
 from khmer.khmer_args import info
-from oxli.functions import estimate_optimal_with_N_and_M
-from oxli.functions import estimate_optimal_with_N_and_f
+from oxli.functions import optimal_size
 import textwrap
 import sys
 
@@ -69,14 +68,14 @@ def get_parser():
                         + khmer.__version__)
     return parser
 
-    
+
 def main():
     info('estimate_optimal_hash.py', ['counting'])
     args = get_parser().parse_args()
     N = args.N
     if args.M:
         M = args.M
-        result = estimate_optimal_with_N_and_M(N,M)
+        result = optimal_size(N, M=M)
         print("number of estimated distinct k-mers:  ", N, file=sys.stderr)
         print("size of memory available to use:      ", M, file=sys.stderr)
         print("optimal number of hash tables:        ", result.num_htables,
@@ -87,10 +86,10 @@ def main():
               file=sys.stderr)
         print("estimated usage of memory:            ", result.mem_use,
               file=sys.stderr)
-        
+
     elif args.f:
         f = args.f
-        result = estimate_optimal_with_N_and_f(N,f)
+        result = optimal_size(N, f=f)
         print("number of estimated distinct k-mers:  ", N, file=sys.stderr)
         print("desired maximum false positive rate:  ", f, file=sys.stderr)
         print("optimal number of hash tables:        ", result.num_htables,
diff --git a/sandbox/extract-single-partition.py b/sandbox/extract-single-partition.py
index ccc0f28..4e64b3f 100755
--- a/sandbox/extract-single-partition.py
+++ b/sandbox/extract-single-partition.py
@@ -11,7 +11,7 @@ from screed.fasta import fasta_iter
 
 
 def read_partition_file(fp):
-    for n, record in enumerate(fasta_iter(fp, parse_description=False)):
+    for n, record in enumerate(fasta_iter(fp)):
         name = record['name']
         name, partition_id = name.rsplit('\t', 1)
         yield n, name, int(partition_id), record['sequence']
diff --git a/sandbox/optimal_args_hashbits.py b/sandbox/optimal_args_hashbits.py
index 1fba596..9b866e5 100644
--- a/sandbox/optimal_args_hashbits.py
+++ b/sandbox/optimal_args_hashbits.py
@@ -52,7 +52,7 @@ def main():
     print('Counting kmers from sequences in %s' % repr(filenames),
           file=sys.stderr)
 
-    htable = khmer.new_hashbits(args.ksize, args.min_tablesize, args.n_tables)
+    htable = khmer.new_hashbits(args.ksize, args.max_tablesize, args.n_tables)
     target_method = htable.consume_fasta_with_reads_parser
 
     for _, filename in enumerate(filenames):
diff --git a/sandbox/readaligner_pairhmm_train.py b/sandbox/readaligner_pairhmm_train.py
new file mode 100644
index 0000000..0e60e06
--- /dev/null
+++ b/sandbox/readaligner_pairhmm_train.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+from __future__ import division
+from __future__ import print_function
+import khmer
+import argparse
+import collections
+from math import log
+import json
+try:
+    import pysam
+except:
+    pass
+
+cigar_to_state = {0: 'M', 1: 'Ir', 2: 'Ig'}
+
+
+def extract_cigar(cigar):
+    ret = []
+    for t, length in cigar:
+        for i in range(length):
+            ret.append(cigar_to_state[t])
+
+    return ret
+
+
+def trusted_str(cov, trusted_cutoff):
+    if cov < trusted_cutoff:
+        return '_u'
+    else:
+        return '_t'
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--trusted-cutoff', type=int, default=5)
+    parser.add_argument(
+        "ht", type=str, help="Counting bloom filter for the reads")
+    parser.add_argument("bam_file", type=str, help="bam read mapping file")
+    parser.add_argument("--json", action='store_true', help="output JSON")
+
+    args = parser.parse_args()
+
+    ht = khmer.load_counting_hash(args.ht)
+    samfile = pysam.Samfile(args.bam_file)
+
+    k = ht.ksize()
+    seq_cnt = 0
+    dropped_seqs = 0
+    base_cnt = {}
+    state_cnts = {}
+    trans_cnts = {}
+
+    total_bases = 0.0
+
+    for rec in samfile:
+        seq = rec.seq
+        cigar = rec.cigar
+
+        seq_cnt += 1
+        if 'N' in seq:
+            dropped_seqs += 1
+            continue
+
+        states = extract_cigar(cigar)
+
+        kmer = seq[:k]
+        state = states[k] + trusted_str(ht.count(kmer), args.trusted_cutoff)
+
+        state_cnts[state] = state_cnts.get(state, 0) + 1
+        base_cnt[kmer[-1]] = base_cnt.get(kmer[-1], 0) + 1
+
+        for i in range(1, len(seq) - k - 1):
+            total_bases += 1
+            kmer = seq[i:i + k]
+            cov = ht.get(kmer)
+
+            last_state = state
+            state = states[i] + trusted_str(cov, args.trusted_cutoff)
+
+            trans = last_state + '-' + state
+            trans_cnts[trans] = trans_cnts.get(trans, 0) + 1
+
+            state_cnts[state] = state_cnts.get(state, 0) + 1
+            base_cnt[kmer[-1]] = base_cnt.get(kmer[-1], 0) + 1
+
+    if not args.json:
+        print("kmer size=", k)
+        print("seq count=", seq_cnt, "dropped seqs=", dropped_seqs)
+        print("base counts=", base_cnt)
+        print("state counts=", state_cnts)
+        print("trans counts=", trans_cnts)
+
+    
+    if not args.json:
+
+        trans_probs = collections.defaultdict(float(0))
+
+        for trans in sorted(trans_cnts.keys()):
+            start_state = trans.split('-')[0]
+            trans_probs[trans] = trans_cnts[
+                trans] / float(state_cnts[start_state])
+            print('{0}\t{1:0.7f}'.format(trans, trans_probs[trans]))
+
+        print('static double trans_default[] = { log2{0:0.7f}, log2{1:0.7f}, ' \
+            'log2{2:0.7f}, log2{3:0.7f}, log2{4:0.7f}, ' \
+            'log2(5:0.7f},'.format(trans_probs['M_t-M_t'],
+                                   trans_probs['M_t-Ir_t'],
+                                   trans_probs[
+                'M_t-Ig_t'], trans_probs['M_t-M_u'],
+                trans_probs['M_t-Ir_u'],
+                trans_probs['M_t-Ig_u']))
+        print('log2{0:0.7f}, log2{1:0.7f}, log2{2:0.7f}, log2{3:0.7f},'.format(
+            trans_probs[
+                'Ir_t-M_t'], trans_probs['Ir_t-Ir_t'], trans_probs['Ir_t-M_u'],
+            trans_probs['Ir_t,Ir_u']))
+        print('log2{0:0.7f}, log2{1:0.7f}, log2{2:0.7f}, log2{3:0.7f},'.format(
+            trans_probs[
+                'Ig_t-M_t'], trans_probs['Ig_t-Ig_t'], trans_probs['Ig_t-M_u'],
+            trans_probs['Ig_t,Ig_u']))
+        print('log2{0:0.7f}, log2{1:0.7f}, log2{2:0.7f}, log2{3:0.7f}, '\
+            'log2{4:0.7f}, log2(5:0.7f},'.format(
+                trans_probs['M_u-M_t'], trans_probs['M_u-Ir_t'],
+                trans_probs['M_u-Ig_t'], trans_probs['M_u-M_u'],
+                trans_probs['M_u-Ir_u'], trans_probs['M_u-Ig_u']))
+        print('log2{0:0.7f}, log2{1:0.7f}, log2{2:0.7f}, log2{3:0.7f},'.format(
+            trans_probs[
+                'Ir_u-M_t'], trans_probs['Ir_u-Ir_t'], trans_probs['Ir_u-M_u'],
+            trans_probs['Ir_u,Ir_u']))
+        print('log2{0:0.7f}, log2{1:0.7f}, log2{2:0.7f}, log2{3:0.7f},'.format(
+            trans_probs[
+                'Ig_u-M_t'], trans_probs['Ig_u-Ig_t'], trans_probs['Ig_u-M_u'],
+            trans_probs['Ig_u,Ig_u']))
+        print('};')
+    else:
+        params = {'scoring_matrix':
+                  [-0.06642736173897607,
+                   -4.643856189774724,
+                   -7.965784284662087,
+                   -9.965784284662087],
+                  'transition_probabilities': ((
+                      log(trans_cnts['M_t-M_t'] / float(state_cnts['M_t']), 2),
+                      log(trans_cnts['M_t-Ir_t'] /
+                          float(state_cnts['M_t']), 2),
+                      log(trans_cnts['M_t-Ig_t'] /
+                          float(state_cnts['M_t']), 2),
+                      log(trans_cnts['M_t-M_u'] / float(state_cnts['M_t']), 2),
+                      log(trans_cnts['M_t-Ir_u'] /
+                          float(state_cnts['M_t']), 2),
+                      log(trans_cnts['M_t-Ig_u'] /
+                          float(state_cnts['M_t']), 2),
+                  ), (
+                      log(trans_cnts['Ir_t-M_t'] /
+                          float(state_cnts['Ir_t']), 2),
+                      log(trans_cnts['Ir_t-Ir_t'] /
+                          float(state_cnts['Ir_t']), 2),
+                      log(trans_cnts['Ir_t-M_u'] /
+                          float(state_cnts['Ir_t']), 2),
+                      log(trans_cnts['Ir_t-Ir_u'] /
+                          float(state_cnts['Ir_t']), 2),
+                  ), (
+                      log(trans_cnts['Ig_t-M_t'] /
+                          float(state_cnts['Ig_t']), 2),
+                      log(trans_cnts['Ig_t-Ig_t'] /
+                          float(state_cnts['Ig_t']), 2),
+                      log(trans_cnts['Ig_t-M_u'] /
+                          float(state_cnts['Ig_t']), 2),
+                      log(trans_cnts['Ig_t-Ig_u'] /
+                          float(state_cnts['Ig_t']), 2),
+                  ), (
+                      log(trans_cnts['M_u-M_t'] / float(state_cnts['M_u']), 2),
+                      log(trans_cnts['M_u-Ir_t'] /
+                          float(state_cnts['M_u']), 2),
+                      log(trans_cnts['M_u-Ig_t'] /
+                          float(state_cnts['M_u']), 2),
+                      log(trans_cnts['M_u-M_u'] / float(state_cnts['M_u']), 2),
+                      log(trans_cnts['M_u-Ir_u'] /
+                          float(state_cnts['M_u']), 2),
+                      log(trans_cnts['M_u-Ig_u'] /
+                          float(state_cnts['M_u']), 2),
+                  ), (
+                      log(trans_cnts['Ir_u-M_t'] /
+                          float(state_cnts['Ir_u']), 2),
+                      log(trans_cnts['Ir_u-Ir_t'] /
+                          float(state_cnts['Ir_u']), 2),
+                      log(trans_cnts['Ir_u-M_u'] /
+                          float(state_cnts['Ir_u']), 2),
+                      log(trans_cnts['Ir_u-Ir_u'] /
+                          float(state_cnts['Ir_u']), 2),
+                  ), (
+                      log(trans_cnts['Ig_u-M_t'] /
+                          float(state_cnts['Ig_u']), 2),
+                      log(trans_cnts['Ig_u-Ig_t'] /
+                          float(state_cnts['Ig_u']), 2),
+                      log(trans_cnts['Ig_u-M_u'] /
+                          float(state_cnts['Ig_u']), 2),
+                      log(trans_cnts['Ig_u-Ig_u'] /
+                          float(state_cnts['Ig_u']), 2),
+                  )
+                  )
+                  }
+        print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py
index 40aee14..a47cde4 100755
--- a/sandbox/saturate-by-median.py
+++ b/sandbox/saturate-by-median.py
@@ -215,6 +215,8 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         except IOError as err:
             handle_error(err, input_filename)
             if not args.force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 print('** Exiting!', file=sys.stderr)
                 sys.exit(1)
             else:
@@ -243,7 +245,7 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
 
     if args.force and len(corrupt_files) > 0:
         print("** WARNING: Finished with errors!", file=sys.stderr)
-        print("** IOErrors occurred in the following files:", file=sys.stderr)
+        print("** I/O Errors occurred in the following files:", file=sys.stderr)
         print("\t", " ".join(corrupt_files), file=sys.stderr)
 
 
diff --git a/sandbox/sweep-files.py b/sandbox/sweep-files.py
index 6b44ea6..e80ca44 100755
--- a/sandbox/sweep-files.py
+++ b/sandbox/sweep-files.py
@@ -103,15 +103,15 @@ def main():
     parser = get_parser()
     args = parser.parse_args()
 
-    if args.min_tablesize < MIN_HSIZE:
-        args.min_tablesize = MIN_HSIZE
+    if args.max_tablesize < MIN_HSIZE:
+        args.max_tablesize = MIN_HSIZE
     if args.ksize < MIN_KSIZE:
         args.ksize = MIN_KSIZE
 
     report_on_config(args, hashtype='nodegraph')
 
     K = args.ksize
-    HT_SIZE = args.min_tablesize
+    HT_SIZE = args.max_tablesize
     N_HT = args.n_tables
 
     traversal_range = args.traversal_range
@@ -137,7 +137,7 @@ def main():
                 ht.consume_sequence_and_tag_with_labels(record.sequence, i)
 
 
-    except IOError as e:
+    except (IOError, OSError) as e:
         print('!! ERROR: !!', e, file=sys.stderr)
         print('...error setting up outputs. exiting...', file=sys.stderr)
 
diff --git a/sandbox/sweep-reads.py b/sandbox/sweep-reads.py
index fbf2ccb..7134fc3 100755
--- a/sandbox/sweep-reads.py
+++ b/sandbox/sweep-reads.py
@@ -121,7 +121,7 @@ class ReadBufferManager(object):
         buf = self.buffers[buf_id]
         try:
             outfp = open(fpath, 'a')
-        except IOError as _:
+        except (IOError, OSError) as _:
             print('!! ERROR: {_} !!'.format(_=_), file=sys.stderr)
             print('*** Failed to open {fn} for \
                                 buffer flush'.format(fn=fpath), file=sys.stderr)
@@ -290,11 +290,11 @@ def main():
 
                     write_record(record, outfp)
 
-            except IOError as e:
+            except (IOError, OSError) as e:
                 print('!! ERROR !!', e, file=sys.stderr)
                 print('...error splitting input. exiting...', file=sys.stderr)
 
-    except IOError as e:
+    except (IOError, OSError) as e:
         print('!! ERROR: !!', e, file=sys.stderr)
         print('...error consuming \
                             {i}. exiting...'.format(i=input_fastp), file=sys.stderr)
@@ -319,7 +319,7 @@ def main():
         file_t = 0.0
         try:
             read_fp = screed.open(read_file)
-        except IOError as error:
+        except (IOError, OSError) as error:
             print('!! ERROR: !!', error, file=sys.stderr)
             print('*** Could not open {fn}, skipping...'.format(
                 fn=read_file), file=sys.stderr)
diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index 10d2109..4da22b2 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -24,9 +24,8 @@ import threading
 import textwrap
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, add_threading_args,
-                              report_on_config, info)
-from khmer.kfile import (check_input_files, check_space,
-                         check_space_for_hashtable)
+                              report_on_config, info, calculate_tablesize)
+from khmer.kfile import (check_input_files, check_space_for_hashtable)
 
 
 def get_parser():
@@ -59,14 +58,9 @@ def get_parser():
     parser.add_argument('-s', '--squash', dest='squash_output', default=False,
                         action='store_true',
                         help='Overwrite output file if it exists')
-    parser.add_argument('--csv', default=False, action='store_true',
-                        help='Use the CSV format for the histogram. '
-                        'Includes column headers.')
     parser.add_argument('--savetable', default='', metavar="filename",
                         help="Save the k-mer counting table to the specified "
                         "filename.")
-    parser.add_argument('--report-total-kmers', '-t', action='store_true',
-                        help="Prints the total number of k-mers to stderr")
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -78,10 +72,9 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     report_on_config(args)
 
     check_input_files(args.input_sequence_filename, args.force)
-    check_space([args.input_sequence_filename], args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
-
+        tablesize = calculate_tablesize(args, 'countgraph')
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
     if (not args.squash_output and
             os.path.exists(args.output_histogram_filename)):
         print('ERROR: %s exists; not squashing.' %
@@ -89,11 +82,10 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
         sys.exit(1)
     else:
         hist_fp = open(args.output_histogram_filename, 'w')
-        if args.csv:
-            hist_fp_csv = csv.writer(hist_fp)
-            # write headers:
-            hist_fp_csv.writerow(['abundance', 'count', 'cumulative',
-                                  'cumulative_fraction'])
+        hist_fp_csv = csv.writer(hist_fp)
+        # write headers:
+        hist_fp_csv.writerow(['abundance', 'count', 'cumulative',
+                              'cumulative_fraction'])
 
     print('making countgraph', file=sys.stderr)
     counting_hash = khmer_args.create_countgraph(args, multiplier=1.1)
@@ -124,9 +116,8 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     for thread in threads:
         thread.join()
 
-    if args.report_total_kmers:
-        print('Total number of unique k-mers: {0}'.format(
-            counting_hash.n_unique_kmers()), file=sys.stderr)
+    print('Total number of unique k-mers: {0}'.format(
+        counting_hash.n_unique_kmers()), file=sys.stderr)
 
     abundance_lists = []
 
@@ -176,10 +167,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
         sofar += i
         frac = sofar / float(total)
 
-        if args.csv:
-            hist_fp_csv.writerow([_, i, sofar, round(frac, 3)])
-        else:
-            print(_, i, sofar, round(frac, 3), file=hist_fp)
+        hist_fp_csv.writerow([_, i, sofar, round(frac, 3)])
 
         if sofar == total:
             break
diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py
index 7661ec6..1e96c1e 100755
--- a/scripts/abundance-dist.py
+++ b/scripts/abundance-dist.py
@@ -5,7 +5,7 @@
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=missing-docstring,invalid-name
+# pylint: disable=missing-docstring
 """
 Produce the k-mer abundance distribution for the given file.
 
@@ -22,7 +22,6 @@ import argparse
 import os
 from khmer.kfile import check_input_files
 from khmer.khmer_args import info
-from khmer.utils import write_record
 
 
 def get_parser():
@@ -47,9 +46,6 @@ def get_parser():
     parser.add_argument('-b', '--no-bigcount', dest='bigcount', default=True,
                         action='store_false',
                         help='Do not count k-mers past 255')
-    parser.add_argument('--csv', default=False, action='store_true',
-                        help='Use the CSV format for the histogram. '
-                        'Includes column headers.')
     parser.add_argument('--version', action='version', version='%(prog)s ' +
                         khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
@@ -61,10 +57,11 @@ def get_parser():
 def main():
     info('abundance-dist.py', ['counting'])
     args = get_parser().parse_args()
+
     infiles = [args.input_counting_table_filename,
                args.input_sequence_filename]
     for infile in infiles:
-        check_input_files(infile, args.force)
+        check_input_files(infile, False)
 
     print('hashtable from', args.input_counting_table_filename,
           file=sys.stderr)
@@ -88,7 +85,9 @@ def main():
     print('HT sizes:', hashsizes, file=sys.stderr)
     print('outputting to', args.output_histogram_filename, file=sys.stderr)
 
-    if os.path.exists(args.output_histogram_filename):
+    if args.output_histogram_filename in ('-', '/dev/stdout'):
+        pass
+    elif os.path.exists(args.output_histogram_filename):
         if not args.squash_output:
             print('ERROR: %s exists; not squashing.' %
                   args.output_histogram_filename,
@@ -110,12 +109,14 @@ def main():
               file=sys.stderr)
         sys.exit(1)
 
-    hash_fp = open(args.output_histogram_filename, 'w')
-    if args.csv:
-        hash_fp_csv = csv.writer(hash_fp)
-        # write headers:
-        hash_fp_csv.writerow(['abundance', 'count', 'cumulative',
-                              'cumulative_fraction'])
+    if args.output_histogram_filename in ('-', '/dev/stdout'):
+        hash_fp = sys.stdout
+    else:
+        hash_fp = open(args.output_histogram_filename, 'w')
+    hash_fp_csv = csv.writer(hash_fp)
+    # write headers:
+    hash_fp_csv.writerow(['abundance', 'count', 'cumulative',
+                          'cumulative_fraction'])
 
     sofar = 0
     for _, i in enumerate(abundances):
@@ -125,10 +126,7 @@ def main():
         sofar += i
         frac = sofar / float(total)
 
-        if args.csv:
-            hash_fp_csv.writerow([_, i, sofar, round(frac, 3)])
-        else:
-            print(_, i, sofar, round(frac, 3), file=hash_fp)
+        hash_fp_csv.writerow([_, i, sofar, round(frac, 3)])
 
         if sofar == total:
             break
diff --git a/scripts/count-median.py b/scripts/count-median.py
index 19e6473..7ca052c 100755
--- a/scripts/count-median.py
+++ b/scripts/count-median.py
@@ -40,11 +40,7 @@ def get_parser():
     to estimate expression levels (mRNAseq) or coverage (genomic/metagenomic).
 
     The output file contains sequence id, median, average, stddev, and
-    seq length; fields are separated by spaces. For khmer 1.x
-    count-median.py will split sequence names at the first space which
-    means that some sequence formats (e.g. paired FASTQ in Casava 1.8
-    format) will yield uninformative names.  Use :option:`--csv` to
-    fix this behavior.
+    seq length, in comma-separated value (CSV) format.
 
     Example::
 
@@ -61,14 +57,12 @@ def get_parser():
     parser.add_argument('input', metavar='input_sequence_filename',
                         help='input FAST[AQ] sequence filename')
     parser.add_argument('output', metavar='output_summary_filename',
-                        help='output summary filename')
+                        help='output summary filename',
+                        type=argparse.FileType('w'))
     parser.add_argument('--version', action='version', version='%(prog)s ' +
                         khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
-    parser.add_argument('--csv', default=False, action='store_true',
-                        help="Use the CSV format for the histogram."
-                        "Includes column headers.")
     return parser
 
 
@@ -78,7 +72,8 @@ def main():
 
     htfile = args.ctfile
     input_filename = args.input
-    output_filename = args.output
+    output = args.output
+    output_filename = str(output)
 
     infiles = [htfile, input_filename]
     for infile in infiles:
@@ -89,21 +84,13 @@ def main():
     print('loading k-mer counting table from', htfile, file=sys.stderr)
     htable = khmer.load_counting_hash(htfile)
     ksize = htable.ksize()
-
     print('writing to', output_filename, file=sys.stderr)
-    output = open(output_filename, 'w')
-
-    if args.csv:
-        output = csv.writer(output)
-        # write headers:
-        output.writerow(['name', 'median', 'average', 'stddev', 'seqlen'])
 
-    parse_description = True            # @legacy behavior: split seq headers
-    if args.csv:
-        parse_description = False       # only enable if we're doing csv out
+    output = csv.writer(output)
+    # write headers:
+    output.writerow(['name', 'median', 'average', 'stddev', 'seqlen'])
 
-    for record in screed.open(input_filename,
-                              parse_description=parse_description):
+    for record in screed.open(input_filename):
         seq = record.sequence.upper()
         if 'N' in seq:
             seq = seq.replace('N', 'A')
@@ -111,10 +98,7 @@ def main():
         if ksize <= len(seq):
             medn, ave, stdev = htable.get_median_count(seq)
             ave, stdev = [round(x, 9) for x in (ave, stdev)]
-            if args.csv:
-                output.writerow([record.name, medn, ave, stdev, len(seq)])
-            else:
-                print(record.name, medn, ave, stdev, len(seq), file=output)
+            output.writerow([record.name, medn, ave, stdev, len(seq)])
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py
index a8c715d..51ffbf3 100755
--- a/scripts/count-overlap.py
+++ b/scripts/count-overlap.py
@@ -24,7 +24,7 @@ import csv
 import khmer
 import textwrap
 from khmer import khmer_args
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info)
 
 
@@ -43,10 +43,6 @@ def get_parser():
                         help="input sequence filename")
     parser.add_argument('report_filename', metavar='output_report_filename',
                         help='output report filename')
-    parser.add_argument('--csv', default=False, action='store_true',
-                        help='Use the CSV format for the curve output '
-                        'in ${output_report_filename}.curve, '
-                        'including column headers.')
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -60,18 +56,15 @@ def main():
     for infile in [args.ptfile, args.fafile]:
         check_input_files(infile, args.force)
 
-    check_space([args.ptfile, args.fafile], args.force)
-
     print('loading k-mer presence table from', args.ptfile, file=sys.stderr)
     ht1 = khmer.load_hashbits(args.ptfile)
     kmer_size = ht1.ksize()
 
     output = open(args.report_filename, 'w')
     f_curve_obj = open(args.report_filename + '.curve', 'w')
-    if args.csv:
-        f_curve_obj_csv = csv.writer(f_curve_obj)
-        # write headers:
-        f_curve_obj_csv.writerow(['input_seq', 'overlap_kmer'])
+    f_curve_obj_csv = csv.writer(f_curve_obj)
+    # write headers:
+    f_curve_obj_csv.writerow(['input_seq', 'overlap_kmer'])
 
     ht2 = khmer_args.create_nodegraph(args, ksize=kmer_size)
 
@@ -88,10 +81,7 @@ dataset2: %s
     output.write(printout1)
 
     for i in range(100):
-        if args.csv:
-            f_curve_obj_csv.writerow([list_curve[100 + i], list_curve[i]])
-        else:
-            print(list_curve[100 + i], list_curve[i], file=f_curve_obj)
+        f_curve_obj_csv.writerow([list_curve[100 + i], list_curve[i]])
 
     print('wrote to: ' + args.report_filename, file=sys.stderr)
 
diff --git a/scripts/extract-long-sequences.py b/scripts/extract-long-sequences.py
index 7155070..0a83bed 100755
--- a/scripts/extract-long-sequences.py
+++ b/scripts/extract-long-sequences.py
@@ -34,7 +34,8 @@ def get_parser():
     parser.add_argument('input_filenames', help='Input FAST[AQ]'
                         ' sequence filename.', nargs='+')
     parser.add_argument('-o', '--output', help='The name of the output'
-                        ' sequence file.', default="/dev/stdout")
+                        ' sequence file.', default=sys.stdout,
+                        metavar='output', type=argparse.FileType('w'))
     parser.add_argument('-l', '--length', help='The minimum length of'
                         ' the sequence file.',
                         type=int, default=200)
@@ -43,12 +44,12 @@ def get_parser():
 
 def main():
     args = get_parser().parse_args()
-    outfp = open(args.output, 'w')
+    outfp = args.output
     for filename in args.input_filenames:
-        for record in screed.open(filename, parse_description=False):
+        for record in screed.open(filename):
             if len(record['sequence']) >= args.length:
                 write_record(record, outfp)
-    print('wrote to: ' + args.output, file=sys.stderr)
+    print('wrote to: ' + outfp.name, file=sys.stderr)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/extract-paired-reads.py b/scripts/extract-paired-reads.py
index ea8bebf..34f8f21 100755
--- a/scripts/extract-paired-reads.py
+++ b/scripts/extract-paired-reads.py
@@ -89,7 +89,7 @@ def main():
     check_space([infile], args.force)
 
     # decide where to put output files - specific directory? or just default?
-    if infile == '/dev/stdin' or infile == '-':
+    if infile in ('/dev/stdin', '-'):
         if not (args.output_paired and args.output_single):
             print("Accepting input from stdin; output filenames must be "
                   "provided.", file=sys.stderr)
@@ -124,7 +124,7 @@ def main():
     n_pe = 0
     n_se = 0
 
-    screed_iter = screed.open(infile, parse_description=False)
+    screed_iter = screed.open(infile)
     for index, is_pair, read1, read2 in broken_paired_reader(screed_iter):
         if index % 100000 == 0 and index > 0:
             print('...', index, file=sys.stderr)
diff --git a/scripts/extract-partitions.py b/scripts/extract-partitions.py
index 777db38..b4ee857 100755
--- a/scripts/extract-partitions.py
+++ b/scripts/extract-partitions.py
@@ -34,8 +34,7 @@ DEFAULT_THRESHOLD = 5
 
 
 def read_partition_file(filename):
-    for record_index, record in enumerate(screed.open
-                                          (filename, parse_description=False)):
+    for record_index, record in enumerate(screed.open(filename)):
         _, partition_id = record.name.rsplit('\t', 1)
         yield record_index, record, int(partition_id)
 
diff --git a/scripts/fastq-to-fasta.py b/scripts/fastq-to-fasta.py
index ef21cda..0dc5831 100755
--- a/scripts/fastq-to-fasta.py
+++ b/scripts/fastq-to-fasta.py
@@ -33,8 +33,8 @@ def get_parser():
                         type=argparse.FileType('w'),
                         default=sys.stdout)
     parser.add_argument('-n', '--n_keep', default=False, action='store_true',
-                        help='Option to drop reads containing \'N\'s in ' +
-                        'input_sequence file.')
+                        help='Option to keep reads containing \'N\'s in ' +
+                        'input_sequence file. Default is to drop reads')
     return parser
 
 
@@ -43,8 +43,7 @@ def main():
     print(('fastq from ', args.input_sequence), file=sys.stderr)
 
     n_count = 0
-    for n, record in enumerate(screed.open(args.input_sequence,
-                                           parse_description=False)):
+    for n, record in enumerate(screed.open(args.input_sequence)):
         if n % 10000 == 0:
             print('...', n, file=sys.stderr)
 
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index b22a494..0aaf76b 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -26,7 +26,7 @@ import textwrap
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, report_on_config,
-                              add_threading_args, info)
+                              add_threading_args, info, calculate_tablesize)
 from khmer.kfile import (check_input_files, check_space,
                          check_space_for_hashtable)
 #
@@ -58,8 +58,6 @@ def get_parser():
                         "k-mer counting table to")
     parser.add_argument('datafile', metavar='input_sequence_filename',
                         help="FAST[AQ] sequence file to trim")
-    parser.add_argument('--report-total-kmers', '-t', action='store_true',
-                        help="Prints the total number of k-mers to stderr")
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -71,7 +69,9 @@ def main():
     check_input_files(args.datafile, args.force)
     check_space([args.datafile], args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        tablesize = calculate_tablesize(args, 'countgraph')
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
+
     report_on_config(args)
 
     print('making countgraph', file=sys.stderr)
@@ -93,9 +93,8 @@ def main():
     for _ in threads:
         _.join()
 
-    if args.report_total_kmers:
-        print('Total number of unique k-mers: {0}'.format(
-            htable.n_unique_kmers()), file=sys.stderr)
+    print('Total number of unique k-mers: {0}'.format(
+        htable.n_unique_kmers()), file=sys.stderr)
 
     fp_rate = khmer.calc_expected_collisions(htable, args.force)
     print('fp rate estimated to be %1.3f' % fp_rate, file=sys.stderr)
diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py
index e994c60..a5fc98f 100755
--- a/scripts/filter-abund.py
+++ b/scripts/filter-abund.py
@@ -63,8 +63,9 @@ def get_parser():
                         help='Base the variable-coverage cutoff on this median'
                         ' k-mer abundance.',
                         default=DEFAULT_NORMALIZE_LIMIT)
-    parser.add_argument('-o', '--out', dest='single_output_filename',
-                        default='', metavar="optional_output_filename",
+    parser.add_argument('-o', '--out', dest='single_output_file',
+                        type=argparse.FileType('w'),
+                        metavar="optional_output_filename",
                         help='Output the trimmed sequences into a single file '
                         'with the given filename instead of creating a new '
                         'file for each input file.')
@@ -81,6 +82,12 @@ def main():
 
     check_input_files(args.input_table, args.force)
     infiles = args.input_filename
+    if ('-' in infiles or '/dev/stdin' in infiles) and not \
+       args.single_output_file:
+        print("Accepting input from stdin; output filename must "
+              "be provided with -o.", file=sys.stderr)
+        sys.exit(1)
+
     for filename in infiles:
         check_input_files(filename, args.force)
 
@@ -116,9 +123,9 @@ def main():
     # the filtering loop
     for infile in infiles:
         print('filtering', infile, file=sys.stderr)
-        if args.single_output_filename != '':
-            outfile = args.single_output_filename
-            outfp = open(outfile, 'a')
+        if args.single_output_file:
+            outfile = args.single_output_file.name
+            outfp = args.single_output_file
         else:
             outfile = os.path.basename(infile) + '.abundfilt'
             outfp = open(outfile, 'w')
@@ -128,5 +135,6 @@ def main():
 
         print('output in', outfile, file=sys.stderr)
 
+
 if __name__ == '__main__':
     main()
diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py
index 94d5776..ae7d598 100755
--- a/scripts/interleave-reads.py
+++ b/scripts/interleave-reads.py
@@ -18,9 +18,6 @@ By default, output is sent to stdout; or use -o. Use '-h' for parameter help.
 """
 from __future__ import print_function
 
-# TODO: take fa as well?
-#      support gzip option?
-
 import screed
 import sys
 import os
@@ -56,7 +53,8 @@ def get_parser():
         epilog=textwrap.dedent(epilog),
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    parser.add_argument('infiles', nargs='+')
+    parser.add_argument('left')
+    parser.add_argument('right')
     parser.add_argument('-o', '--output', metavar="filename",
                         type=argparse.FileType('w'),
                         default=sys.stdout)
@@ -71,23 +69,12 @@ def main():
     info('interleave-reads.py')
     args = get_parser().parse_args()
 
-    for _ in args.infiles:
-        check_input_files(_, args.force)
-
-    check_space(args.infiles, args.force)
-
-    s1_file = args.infiles[0]
-    if len(args.infiles) == 2:
-        s2_file = args.infiles[1]
-    else:
-        s2_file = s1_file.replace('_R1_', '_R2_')
-        if s1_file == s2_file:
-            print(("ERROR: given only one filename, that "
-                   "doesn't contain _R1_. Exiting."), file=sys.stderr)
-            sys.exit(1)
+    check_input_files(args.left, args.force)
+    check_input_files(args.right, args.force)
+    check_space([args.left, args.right], args.force)
 
-        print(("given only one file; "
-               "guessing that R2 file is %s" % s2_file), file=sys.stderr)
+    s1_file = args.left
+    s2_file = args.right
 
     fail = False
     if not os.path.exists(s1_file):
@@ -104,8 +91,8 @@ def main():
     print("Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file), file=sys.stderr)
 
     counter = 0
-    screed_iter_1 = screed.open(s1_file, parse_description=False)
-    screed_iter_2 = screed.open(s2_file, parse_description=False)
+    screed_iter_1 = screed.open(s1_file)
+    screed_iter_2 = screed.open(s2_file)
     for read1, read2 in zip_longest(screed_iter_1, screed_iter_2):
         if read1 is None or read2 is None:
             print(("ERROR: Input files contain different number"
diff --git a/scripts/load-graph.py b/scripts/load-graph.py
index 999403e..0b8c334 100755
--- a/scripts/load-graph.py
+++ b/scripts/load-graph.py
@@ -5,7 +5,7 @@
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=invalid-name,missing-docstring
+# pylint disable=missing-docstring
 """
 Build a graph from the given sequences, save in <ptname>.
 
@@ -13,16 +13,10 @@ Build a graph from the given sequences, save in <ptname>.
 
 Use '-h' for parameter help.
 """
-from __future__ import print_function, unicode_literals
 
 import sys
-import threading
 
-import khmer
 from khmer.khmer_args import build_hashbits_args
-from khmer.khmer_args import (report_on_config, info, add_threading_args)
-from khmer.kfile import check_input_files, check_space
-from khmer.kfile import check_space_for_hashtable
 from oxli import build_graph
 
 
@@ -35,9 +29,7 @@ def get_parser():
 
 
 if __name__ == '__main__':
-    parser = get_parser()
-    args = parser.parse_args()
-    build_graph.main(args)
+    build_graph.main(get_parser().parse_args())
     sys.exit(0)
 
 # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index f907c36..287d7de 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -22,9 +22,9 @@ import textwrap
 import khmer
 from khmer import khmer_args
 from khmer.khmer_args import build_counting_args, report_on_config, info,\
-    add_threading_args
+    add_threading_args, calculate_tablesize
 from khmer.kfile import check_file_writable
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.kfile import check_space_for_hashtable
 
 
@@ -64,8 +64,6 @@ def get_parser():
                         metavar="FORMAT", choices=[str('json'), str('tsv')],
                         help="What format should the machine readable run "
                         "summary be in? (json or tsv, disabled by default)")
-    parser.add_argument('--report-total-kmers', '-t', action='store_true',
-                        help="Prints the total number of k-mers to stderr")
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -84,8 +82,9 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    check_space(args.input_sequence_filename, args.force)
-    check_space_for_hashtable(args, 'countgraph', args.force)
+    tablesize = calculate_tablesize(args, 'countgraph')
+    check_space_for_hashtable(args.output_countingtable_filename, tablesize,
+                              args.force)
 
     check_file_writable(base)
     check_file_writable(base + ".info")
@@ -124,7 +123,8 @@ def main():
             thread.join()
 
         if index > 0 and index % 10 == 0:
-            check_space_for_hashtable(args, 'countgraph', args.force)
+            tablesize = calculate_tablesize(args, 'countgraph')
+            check_space_for_hashtable(base, tablesize, args.force)
             print('mid-save', base, file=sys.stderr)
 
             htable.save(base)
@@ -133,10 +133,9 @@ def main():
         total_num_reads += rparser.num_reads
 
     n_kmers = htable.n_unique_kmers()
-    if args.report_total_kmers:
-        print('Total number of unique k-mers:', n_kmers, file=sys.stderr)
-        with open(base + '.info', 'a') as info_fp:
-            print('Total number of unique k-mers:', n_kmers, file=info_fp)
+    print('Total number of unique k-mers:', n_kmers, file=sys.stderr)
+    with open(base + '.info', 'a') as info_fp:
+        print('Total number of unique k-mers:', n_kmers, file=info_fp)
 
     print('saving', base, file=sys.stderr)
     htable.save(base)
diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py
index 29a08ef..e99a690 100755
--- a/scripts/make-initial-stoptags.py
+++ b/scripts/make-initial-stoptags.py
@@ -18,7 +18,7 @@ import textwrap
 import khmer
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, info)
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 
 DEFAULT_SUBSET_SIZE = int(1e4)
 DEFAULT_COUNTING_HT_SIZE = 3e6                # number of bytes
@@ -83,8 +83,6 @@ def main():
     for _ in infiles:
         check_input_files(_, args.force)
 
-    check_space(infiles, args.force)
-
     print('loading htable %s.pt' % graphbase, file=sys.stderr)
     htable = khmer.load_hashbits(graphbase + '.pt')
 
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index 3bb2ba7..68e25b1 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -5,7 +5,7 @@
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=invalid-name,missing-docstring
+# pylint: disable=missing-docstring
 """
 Eliminate surplus reads.
 
@@ -26,72 +26,111 @@ import khmer
 import textwrap
 from khmer import khmer_args
 from contextlib import contextmanager
-
+from oxli import functions as oxutils
 from khmer.khmer_args import (build_counting_args, add_loadhash_args,
-                              report_on_config, info)
+                              report_on_config, info, calculate_tablesize)
 import argparse
 from khmer.kfile import (check_space, check_space_for_hashtable,
                          check_valid_file_exists)
-from khmer.utils import write_record, check_is_pair, broken_paired_reader
+from khmer.utils import write_record, broken_paired_reader
 
 
 DEFAULT_DESIRED_COVERAGE = 20
 
 
-def WithDiagnostics(ifilename, norm, reader, fp):
-    """
-    Generator/context manager to do boilerplate output of statistics using a
-    Normalizer object.
+class WithDiagnostics(object):
     """
+    Generator/context manager to do boilerplate output of statistics.
 
-    index = 0
+    uses a Normalizer object.
+    """
 
-    # per read diagnostic output
-    for index, record in enumerate(norm(reader)):
+    def __init__(self, norm, report_fp=None, report_frequency=100000):
+        self.norm = norm
+        self.report_fp = report_fp
+        if report_fp:
+            report_fp.write('total,kept,f_kept\n')
 
-        if norm.total % 100000 == 0:
-            print('... kept {kept} of {total} or {perc:2}% so far'
-                  .format(kept=norm.total - norm.discarded,
-                          total=norm.total,
-                          perc=int(100. - norm.discarded /
-                                   float(norm.total) * 100.)),
-                  file=sys.stderr)
+        self.total = 0
+        self.kept = 0
+
+        self.report_frequency = report_frequency
+        self.next_report_at = self.report_frequency
+        self.last_report_at = self.report_frequency
+
+    def __call__(self, reader, ifilename):
+        norm = self.norm
+        report_fp = self.report_fp
+
+        reads_start = self.total
+        total = self.total
+        kept = self.kept
+
+        try:
+            for _, is_paired, read0, read1 in reader:
+                if is_paired:
+                    total += 2
+                else:
+                    total += 1
+
+                # do diginorm
+                for record in norm(is_paired, read0, read1):
+                    kept += 1
+                    yield record
 
-            print('... in file ' + ifilename, file=sys.stderr)
+                # report!
+                if total >= self.next_report_at:
+                    self.next_report_at += self.report_frequency
+                    self.last_report_at = total
+
+                    perc_kept = kept / float(total)
+
+                    print('... kept {kept} of {tot} or {perc_kept:.1%} so far'
+                          .format(kept=kept, tot=total, perc_kept=perc_kept),
+                          file=sys.stderr)
+                    print('... in file ' + ifilename, file=sys.stderr)
+
+                    if report_fp:
+                        print("{total},{kept},{f_kept:.4}"
+                              .format(total=total, f_kept=perc_kept,
+                                      kept=kept),
+                              file=report_fp)
+                        report_fp.flush()
+        finally:
+            self.total = total
+            self.kept = kept
+
+        # per file diagnostic output
+        if total == reads_start:
+            print('SKIPPED empty file ' + ifilename, file=sys.stderr)
+        else:
+            perc_kept = kept / float(total)
 
-        yield record
+            print('DONE with {inp}; kept {kept} of {total} or {perc_kept:.1%}'
+                  .format(inp=ifilename, kept=kept, total=total,
+                          perc_kept=perc_kept),
+                  file=sys.stderr)
 
-    # per file diagnostic output
-    if norm.total == 0:
-        print('SKIPPED empty file ' + ifilename, file=sys.stderr)
-    else:
-        print('DONE with {inp}; kept {kept} of {total} or {perc:2}%'
-              .format(inp=ifilename, kept=norm.total - norm.discarded,
-                      total=norm.total, perc=int(100. - norm.discarded /
-                                                 float(norm.total) * 100.)),
-              file=sys.stderr)
+        # make sure there's at least one report per file, at the end of each
+        # file.
+        if report_fp and total != self.last_report_at:
+            perc_kept = kept / float(total)
 
-    if fp:
-        print("{total} {kept} {discarded}"
-              .format(total=norm.total, kept=norm.total - norm.discarded,
-                      discarded=1. - (norm.discarded / float(norm.total))),
-              file=fp)
-        fp.flush()
+            print("{total},{kept},{f_kept:.4}"
+                  .format(total=total, f_kept=perc_kept, kept=kept),
+                  file=report_fp)
+            report_fp.flush()
 
 
 class Normalizer(object):
-    """
-    Digital normalization algorithm.
-    """
+
+    """Digital normalization algorithm."""
 
     def __init__(self, desired_coverage, htable):
         self.htable = htable
         self.desired_coverage = desired_coverage
 
-        self.total = 0
-        self.discarded = 0
-
-    def __call__(self, reader):
+    def __call__(self, is_paired, read0, read1):
         """
         Actually does digital normalization - the core algorithm.
 
@@ -101,44 +140,33 @@ class Normalizer(object):
         * if any read's median k-mer count is below desired coverage, keep all;
         * consume and yield kept reads.
         """
-
         desired_coverage = self.desired_coverage
 
-        for index, is_paired, read0, read1 in reader:
-            passed_filter = False
+        passed_filter = False
 
-            self.total += 1
+        batch = []
+        batch.append(read0)
+        if read1 is not None:
+            batch.append(read1)
 
-            if is_paired:
-                self.total += 1
-
-            batch = []
-            batch.append(read0)
-            if read1 is not None:
-                batch.append(read1)
+        for record in batch:
+            seq = record.sequence.replace('N', 'A')
+            if not self.htable.median_at_least(seq, desired_coverage):
+                passed_filter = True
 
+        if passed_filter:
             for record in batch:
                 seq = record.sequence.replace('N', 'A')
-                if not self.htable.median_at_least(seq, desired_coverage):
-                    passed_filter = True
-
-            if passed_filter:
-                for record in batch:
-                    seq = record.sequence.replace('N', 'A')
-                    self.htable.consume(seq)
-                    yield record
-            else:
-                self.discarded += len(batch)
+                self.htable.consume(seq)
+                yield record
 
 
 @contextmanager
-def CatchIOErrors(ifile, out, single_out, force, corrupt_files):
-    """
-    Context manager to do boilerplate handling of IOErrors.
-    """
+def catch_io_errors(ifile, out, single_out, force, corrupt_files):
+    """Context manager to do boilerplate handling of IOErrors."""
     try:
         yield
-    except (IOError, ValueError) as error:
+    except (IOError, OSError, ValueError) as error:
         print('** ERROR: ' + str(error), file=sys.stderr)
         print('** Failed on {name}: '.format(name=ifile), file=sys.stderr)
         if not single_out:
@@ -172,10 +200,7 @@ def get_parser():
 
     With :option:`-s`/:option:`--savetable`, the k-mer counting table
     will be saved to the specified file after all sequences have been
-    processed. With :option:`-d`, the k-mer counting table will be
-    saved every d files for multifile runs; if :option:`-s` is set,
-    the specified name will be used, and if not, the name `backup.ct`
-    will be used.  :option:`-l`/:option:`--loadtable` will load the
+    processed. :option:`-l`/:option:`--loadtable` will load the
     specified k-mer counting table before processing the specified
     files.  Note that these tables are are in the same format as those
     produced by :program:`load-into-counting.py` and consumed by
@@ -225,10 +250,13 @@ def get_parser():
                         help='save the k-mer counting table to disk after all'
                         'reads are loaded.')
     parser.add_argument('-R', '--report',
-                        metavar='filename', type=argparse.FileType('w'))
+                        metavar='report_filename', type=argparse.FileType('w'))
+    parser.add_argument('--report-frequency',
+                        metavar='report_frequency', type=int,
+                        default=100000)
     parser.add_argument('-f', '--force', dest='force',
-                        help='continue on next file if read errors are \
-                         encountered', action='store_true')
+                        help='continue past file reading errors',
+                        action='store_true')
     parser.add_argument('-o', '--out', metavar="filename",
                         dest='single_output_file',
                         type=argparse.FileType('w'),
@@ -243,6 +271,7 @@ def get_parser():
 
 
 def main():  # pylint: disable=too-many-branches,too-many-statements
+
     info('normalize-by-median.py', ['diginorm'])
     args = get_parser().parse_args()
 
@@ -251,6 +280,9 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     report_fp = args.report
     force_single = args.force_single
 
+    # if optimization args are given, do optimization
+    args = oxutils.do_sanity_checking(args, 0.1)
+
     # check for similar filenames
     # if we're using a single output file only check for identical filenames
     # otherwise, check for identical BASE names as well.
@@ -274,28 +306,32 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        tablesize = calculate_tablesize(args, 'countgraph')
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     # load or create counting table.
     if args.loadtable:
         print('loading k-mer counting table from ' + args.loadtable,
               file=sys.stderr)
         htable = khmer.load_counting_hash(args.loadtable)
+        if args.unique_kmers != 0:
+            print('Warning: You have specified a number of unique kmers'
+                  ' but are loading a precreated counting table--'
+                  'argument optimization will NOT be done.', file=sys.stderr)
     else:
         print('making countgraph', file=sys.stderr)
         htable = khmer_args.create_countgraph(args)
 
-    input_filename = None
-
     # create an object to handle diginorm of all files
     norm = Normalizer(args.cutoff, htable)
+    with_diagnostics = WithDiagnostics(norm, report_fp, args.report_frequency)
 
     # make a list of all filenames and if they're paired or not;
     # if we don't know if they're paired, default to allowing but not
     # forcing pairing.
     files = []
-    for e in filenames:
-        files.append([e, args.paired])
+    for element in filenames:
+        files.append([element, args.paired])
     if args.unpaired_reads:
         files.append([args.unpaired_reads, False])
 
@@ -309,6 +345,11 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         else:
             output_name = args.single_output_file.name
         outfp = args.single_output_file
+    else:
+        if '-' in filenames or '/dev/stdin' in filenames:
+            print("Accepting input from stdin; output filename must "
+                  "be provided with '-o'.", file=sys.stderr)
+            sys.exit(1)
 
     #
     # main loop: iterate over all files given, do diginorm.
@@ -320,16 +361,16 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
             outfp = open(output_name, 'w')
 
         # failsafe context manager in case an input file breaks
-        with CatchIOErrors(filename, outfp, args.single_output_file,
-                           args.force, corrupt_files):
+        with catch_io_errors(filename, outfp, args.single_output_file,
+                             args.force, corrupt_files):
 
-            screed_iter = screed.open(filename, parse_description=False)
+            screed_iter = screed.open(filename)
             reader = broken_paired_reader(screed_iter, min_length=args.ksize,
                                           force_single=force_single,
                                           require_paired=require_paired)
 
             # actually do diginorm
-            for record in WithDiagnostics(filename, norm, reader, report_fp):
+            for record in with_diagnostics(reader, filename):
                 if record is not None:
                     write_record(record, outfp)
 
@@ -348,7 +389,7 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         htable.save(args.savetable)
 
     fp_rate = \
-        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.8)
+        khmer.calc_expected_collisions(htable, False, max_false_pos=.8)
     # for max_false_pos see Zhang et al., http://arxiv.org/abs/1309.2975
 
     print('fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate),
@@ -356,7 +397,8 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
 
     if args.force and len(corrupt_files) > 0:
         print("** WARNING: Finished with errors!", file=sys.stderr)
-        print("** IOErrors occurred in the following files:", file=sys.stderr)
+        print("** I/O Errors occurred in the following files:",
+              file=sys.stderr)
         print("\t", " ".join(corrupt_files), file=sys.stderr)
 
 
diff --git a/scripts/oxli b/scripts/oxli
new file mode 100755
index 0000000..74d0bf5
--- /dev/null
+++ b/scripts/oxli
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+#
+# This file is a part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) The Regents of the University of California, 2015. It is
+# licensed under the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+
+__requires__ = 'khmer'
+import sys
+from pkg_resources import load_entry_point
+
+if __name__ == '__main__':
+    sys.exit(
+        load_entry_point('khmer', 'console_scripts', 'oxli')()
+    )
diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py
index 73666e2..bb03c0b 100755
--- a/scripts/partition-graph.py
+++ b/scripts/partition-graph.py
@@ -24,7 +24,7 @@ import argparse
 import khmer
 import sys
 from khmer.khmer_args import (add_threading_args, info)
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 
 # Debugging Support
 import re
@@ -111,8 +111,6 @@ def main():
     for _ in filenames:
         check_input_files(_, args.force)
 
-    check_space(filenames, args.force)
-
     print('--', file=sys.stderr)
     print('SUBSET SIZE', args.subset_size, file=sys.stderr)
     print('N THREADS', args.threads, file=sys.stderr)
diff --git a/scripts/readstats.py b/scripts/readstats.py
index d8e995f..570d8cf 100755
--- a/scripts/readstats.py
+++ b/scripts/readstats.py
@@ -47,9 +47,10 @@ def get_parser():
     return parser
 
 
-class StatisticsOutput(object):
-    #  pylint: disable=too-few-public-methods
-    """Output statistics for several data files.
+class StatisticsOutput(object):  # pylint: disable=too-few-public-methods
+
+    """
+    Output statistics for several data files.
 
     The format of the output is determined by the formatter used.
     All statistics are aggregated and a summary is added to the data.
@@ -63,8 +64,7 @@ class StatisticsOutput(object):
         return self
 
     def append(self, basepairs, seqs, filename):
-        """Append a new line for the given basepair number, sequences and file.
-        """
+        """Append a new line for the given basepair num, sequences and file."""
         self.formatter.append(
             basepairs, seqs, basepairs / float(seqs), filename)
 
@@ -74,7 +74,9 @@ class StatisticsOutput(object):
 
 
 class CsvFormatter(object):
+
     """Format the statistis information as CSV."""
+
     headers = ['bp', 'seqs', 'avg_len', 'filename']
 
     def __init__(self, underlying_file):
@@ -89,12 +91,12 @@ class CsvFormatter(object):
         self.file.writerow([basepairs, seqs, "%.1f" % avg_len, filename])
 
     def finalize(self):
-        """No statistics since the CSV data is supposed to be processed further.
-        """
+        """No statistics since the CSV data is to be processed further."""
         pass
 
 
 class StdFormatter(object):
+
     """Format the statistics in a human readable string."""
 
     def __init__(self, underlying_file):
@@ -128,7 +130,7 @@ def analyze_file(filename):
     """Run over the given file and count base pairs and sequences."""
     bps = 0
     seqs = 0
-    input_iter = screed.open(filename, parse_description=False)
+    input_iter = screed.open(filename)
     for record in input_iter:
         if seqs % 100000 == 0:
             print('...', filename, seqs, file=sys.stderr)
diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py
index 79b4777..72b5bed 100755
--- a/scripts/sample-reads-randomly.py
+++ b/scripts/sample-reads-randomly.py
@@ -27,7 +27,7 @@ import textwrap
 import sys
 
 import khmer
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.khmer_args import info
 from khmer.utils import write_record, broken_paired_reader
 
@@ -85,8 +85,6 @@ def main():
     for _ in args.filenames:
         check_input_files(_, args.force)
 
-    check_space(args.filenames, args.force)
-
     # seed the random number generator?
     if args.random_seed:
         random.seed(args.random_seed)
@@ -104,10 +102,16 @@ def main():
             sys.stderr.write(
                 "Error: cannot specify -o with more than one sample.")
             if not args.force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 sys.exit(1)
         output_filename = output_file.name
     else:
         filename = args.filenames[0]
+        if filename in ('/dev/stdin', '-'):
+            print("Accepting input from stdin; output filename must "
+                  "be provided with '-o'.", file=sys.stderr)
+            sys.exit(1)
         output_filename = os.path.basename(filename) + '.subset'
 
     if num_samples == 1:
@@ -131,7 +135,7 @@ def main():
     # read through all the sequences and load/resample the reservoir
     for filename in args.filenames:
         print('opening', filename, 'for reading', file=sys.stderr)
-        screed_iter = screed.open(filename, parse_description=False)
+        screed_iter = screed.open(filename)
 
         for count, (_, ispair, rcrd1, rcrd2) in enumerate(broken_paired_reader(
                 screed_iter,
diff --git a/scripts/split-paired-reads.py b/scripts/split-paired-reads.py
index e9eac94..b77144a 100755
--- a/scripts/split-paired-reads.py
+++ b/scripts/split-paired-reads.py
@@ -98,10 +98,10 @@ def main():
     check_space(filenames, args.force)
 
     # decide where to put output files - specific directory? or just default?
-    if infile == '/dev/stdin' or infile == '-':
+    if infile in ('/dev/stdin', '-'):
         if not (args.output_first and args.output_second):
-            print >>sys.stderr, ("Accepting input from stdin; "
-                                 "output filenames must be provided.")
+            print("Accepting input from stdin; output filenames must "
+                  "be provided.", file=sys.stderr)
             sys.exit(1)
     elif args.output_directory:
         if not os.path.exists(args.output_directory):
@@ -130,7 +130,7 @@ def main():
     counter2 = 0
     index = None
 
-    screed_iter = screed.open(infile, parse_description=False)
+    screed_iter = screed.open(infile)
 
     # walk through all the reads in broken-paired mode.
     paired_iter = broken_paired_reader(screed_iter)
@@ -165,8 +165,8 @@ def main():
 
     print("DONE; split %d sequences (%d left, %d right)" %
           (counter1 + counter2, counter1, counter2), file=sys.stderr)
-    print("/1 reads in %s" % out1, file=sys.stderr)
-    print("/2 reads in %s" % out2, file=sys.stderr)
+    print("left (/1) reads in %s" % out1, file=sys.stderr)
+    print("right (/2) reads in %s" % out2, file=sys.stderr)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py
index 741b181..ddfc43a 100755
--- a/scripts/trim-low-abund.py
+++ b/scripts/trim-low-abund.py
@@ -28,7 +28,7 @@ from screed import Record
 from khmer import khmer_args
 
 from khmer.khmer_args import (build_counting_args, info, add_loadhash_args,
-                              report_on_config)
+                              report_on_config, calculate_tablesize)
 from khmer.utils import write_record, write_record_pair, broken_paired_reader
 from khmer.kfile import (check_space, check_space_for_hashtable,
                          check_valid_file_exists)
@@ -126,7 +126,14 @@ def main():
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        tablesize = calculate_tablesize(args, 'countgraph')
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
+
+    if ('-' in args.input_filenames or '/dev/stdin' in args.input_filenames) \
+       and not args.out:
+        print("Accepting input from stdin; output filename must "
+              "be provided with -o.", file=sys.stderr)
+        sys.exit(1)
 
     if args.loadtable:
         print('loading countgraph from', args.loadtable, file=sys.stderr)
@@ -164,7 +171,7 @@ def main():
 
         pass2list.append((filename, pass2filename, trimfp))
 
-        screed_iter = screed.open(filename, parse_description=False)
+        screed_iter = screed.open(filename)
         pass2fp = open(pass2filename, 'w')
 
         save_pass2 = 0
@@ -260,8 +267,7 @@ def main():
         # so pairs will stay together if not orphaned.  This is in contrast
         # to the first loop.
 
-        for n, read in enumerate(screed.open(pass2filename,
-                                             parse_description=False)):
+        for n, read in enumerate(screed.open(pass2filename)):
             if n % 10000 == 0:
                 print('... x 2', n, pass2filename,
                       written_reads, written_bp, file=sys.stderr)
@@ -301,14 +307,18 @@ def main():
     percent_reads_trimmed = float(trimmed_reads + (n_reads - written_reads)) /\
         n_reads * 100.0
 
-    print('read %d reads, %d bp' % (n_reads, n_bp,))
-    print('wrote %d reads, %d bp' % (written_reads, written_bp,))
+    print('read %d reads, %d bp' % (n_reads, n_bp,), file=sys.stderr)
+    print('wrote %d reads, %d bp' % (written_reads, written_bp,),
+          file=sys.stderr)
     print('looked at %d reads twice (%.2f passes)' % (save_pass2_total,
-                                                      n_passes))
+                                                      n_passes),
+          file=sys.stderr)
     print('removed %d reads and trimmed %d reads (%.2f%%)' %
-          (n_reads - written_reads, trimmed_reads, percent_reads_trimmed))
+          (n_reads - written_reads, trimmed_reads, percent_reads_trimmed),
+          file=sys.stderr)
     print('trimmed or removed %.2f%% of bases (%d total)' %
-          ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp))
+          ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp),
+          file=sys.stderr)
 
     if args.variable_coverage:
         percent_reads_hicov = 100.0 * float(n_reads - skipped_n) / n_reads
diff --git a/sandbox/unique-kmers.py b/scripts/unique-kmers.py
similarity index 63%
rename from sandbox/unique-kmers.py
rename to scripts/unique-kmers.py
index aa78d88..ddf854c 100755
--- a/sandbox/unique-kmers.py
+++ b/scripts/unique-kmers.py
@@ -5,11 +5,12 @@
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=invalid-name,missing-docstring
+# pylint: disable=invalid-name,missing-docstring,no-member
 """
 Estimate number of unique k-mers, with precision <= ERROR_RATE.
 
-% python sandbox/unique-kmers.py [ -k <k size> ] [ -e <ERROR_RATE> ] <data1> <data2> ...
+% python scripts/unique-kmers.py [ -k <k size> ] [ -e <ERROR_RATE> ] <data1>
+<data2> ...
 
 Use '-h' for parameter help.
 """
@@ -22,24 +23,35 @@ import sys
 import textwrap
 
 import khmer
-from khmer.khmer_args import DEFAULT_K, info, ComboFormatter
+from khmer.khmer_args import (DEFAULT_K, info, ComboFormatter,
+                              _VersionStdErrAction)
+from khmer.utils import write_record
 from oxli.functions import optimal_args_output_gen as output_gen
 from khmer import __version__
 import screed
 
+
 def get_parser():
     descr = "Estimate number of unique k-mers, with precision <= ERROR_RATE."
     epilog = ("""
     A HyperLogLog counter is used to do cardinality estimation. Since this counter
     is based on a tradeoff between precision and memory consumption,
-    :option:`-e`/:option:`--error-rate` can be used to control how much
+    the :option:`-e`/:option:`--error-rate` can be used to control how much
     memory will be used. In practice the memory footprint is small even
     at low error rates (< 0.01).
 
     :option:`-k`/:option:`--ksize` should be set to the desired k-mer size.
 
-    Output is sent to STDOUT, but a report file can be generated with
-    :option:`-R`/:option:`--report`.
+    Informational output is sent to STDERR, but a report file can be generated
+    with :option:`-R`/:option:`--report`.
+
+    :option:`--stream-out` will write the sequences taken in to STDOUT.
+    This is useful for workflows: count unique kmers in a stream, then do
+    digital normalization.
+
+    :option:`--diagnostics` will provide detailed options for tablesize
+    and memory limitations for various false positive rates. This is useful for
+    configuring other khmer scripts. This will be written to STDERR.
 
     Example::
 
@@ -47,15 +59,26 @@ def get_parser():
 
     Example::
 
-""" "        unique-kmers.py -R unique_count -k 30 tests/test-data/test-abund-read-paired.fa")  # noqa
+        unique-kmers.py -k 17 --diagnostics tests/test-data/test-abund-read.fa
+
+    Example::
+
+        unique-kmers.py --stream-out -k 17 tests/test-data/test-reads.fa | \\
+        normalize-by-median.py -k 17 -o normalized /dev/stdin
+
+    Example::
+
+        unique-kmers.py -R unique_count -k 30 \\
+        tests/test-data/test-abund-read-paired.fa""")  # noqa
     parser = argparse.ArgumentParser(
         description=descr, epilog=textwrap.dedent(epilog),
         formatter_class=ComboFormatter)
 
     env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
 
-    parser.add_argument('--version', action='version',
+    parser.add_argument('--version', action=_VersionStdErrAction,
                         version='khmer {v}'.format(v=__version__))
+
     parser.add_argument('-q', '--quiet', dest='quiet', default=False,
                         action='store_true')
 
@@ -65,15 +88,21 @@ def get_parser():
     parser.add_argument('--error-rate', '-e', type=float, default=0.01,
                         help='Acceptable error rate')
 
-    parser.add_argument('-R', '--report',
-                        metavar='filename', type=argparse.FileType('w'))
+    parser.add_argument('--report', '-R',
+                        metavar='filename', type=argparse.FileType('w'),
+                        help='generate informational report and write to'
+                        ' filename')
 
     parser.add_argument('--stream-out', '-S', default=False,
-                        action='store_true')
+                        action='store_true',
+                        help='write input sequences to STDOUT')
 
-    parser.add_argument('input_filenames', metavar='input_sequence_filename',
-                        help='Input FAST[AQ] sequence filename.', nargs='+')
+    parser.add_argument('--diagnostics', default=False, action='store_true',
+                        help='print out recommended tablesize arguments and '
+                             'restrictions')
 
+    parser.add_argument('input_filenames', metavar='input_sequence_filename',
+                        help='Input FAST[AQ] sequence filename(s).', nargs='+')
 
     return parser
 
@@ -108,9 +137,10 @@ def main():
     print('Total estimated number of unique {0}-mers: {1}'.format(
           args.ksize, cardinality),
           file=sys.stderr)
-    
+
     to_print = output_gen(cardinality, args.error_rate)
-    print(to_print)
+    if args.diagnostics:
+        print(to_print, file=sys.stderr)
 
     if report_fp:
         print(cardinality, args.ksize, 'total', file=report_fp)
diff --git a/setup.py b/setup.py
index c9d785e..d69c7b8 100755
--- a/setup.py
+++ b/setup.py
@@ -106,7 +106,7 @@ BUILD_DEPENDS.extend(path_join("lib", bn + ".hh") for bn in [
 
 SOURCES = ["khmer/_khmer.cc"]
 SOURCES.extend(path_join("lib", bn + ".cc") for bn in [
-    "trace_logger", "perf_metrics", "read_parsers", "kmer_hash", "hashtable",
+    "read_parsers", "kmer_hash", "hashtable",
     "hashbits", "labelhash", "counting", "subset", "read_aligner",
     "hllcounter"])
 
diff --git a/tests/khmer_tst_utils.py b/tests/khmer_tst_utils.py
index 0cbe36c..13edeb3 100644
--- a/tests/khmer_tst_utils.py
+++ b/tests/khmer_tst_utils.py
@@ -15,6 +15,7 @@ import traceback
 import subprocess
 from io import open
 
+
 try:
     from StringIO import StringIO
 except ImportError:
@@ -52,7 +53,27 @@ def cleanup():
     cleanup_list = []
 
 
+def scriptpath(scriptname='interleave-reads.py'):
+    "Return the path to the scripts, in both dev and install situations."
+
+    # note - it doesn't matter what the scriptname is here, as long as
+    # it's some khmer script present in this version of khmer.
+
+    path = os.path.join(os.path.dirname(__file__), "../scripts")
+
+    if os.path.exists(os.path.join(path, scriptname)):
+        return path
+
+    for path in os.environ['PATH'].split(':'):
+        if os.path.exists(os.path.join(path, scriptname)):
+            return path
+
+
 def _runscript(scriptname, sandbox=False):
+    """
+    Find & run a script with exec (i.e. not via os.system or subprocess).
+    """
+
     import pkg_resources
     ns = {"__name__": "__main__"}
     ns['sys'] = globals()['sys']
@@ -63,17 +84,16 @@ def _runscript(scriptname, sandbox=False):
         return 0
     except pkg_resources.ResolutionError as err:
         if sandbox:
-            paths = [os.path.join(os.path.dirname(__file__), "../sandbox")]
+            path = os.path.join(os.path.dirname(__file__), "../sandbox")
         else:
-            paths = [os.path.join(os.path.dirname(__file__),
-                                  "../scripts")]
-            paths.extend(os.environ['PATH'].split(':'))
-        for path in paths:
-            scriptfile = os.path.join(path, scriptname)
+            path = scriptpath()
+
+        scriptfile = os.path.join(path, scriptname)
+        if os.path.isfile(scriptfile):
             if os.path.isfile(scriptfile):
                 exec(compile(open(scriptfile).read(), scriptfile, 'exec'), ns)
                 return 0
-        if sandbox:
+        elif sandbox:
             raise nose.SkipTest("sandbox tests are only run in a repository.")
 
     return -1
@@ -84,7 +104,8 @@ def runscript(scriptname, args, in_directory=None,
     """Run a Python script using exec().
 
     Run the given Python script, with the given args, in the given directory,
-    using 'execfile'.
+    using 'exec'.  Mimic proper shell functionality with argv, and capture
+    stdout and stderr.
 
     When using :attr:`fail_ok`=False in tests, specify the expected error.
     """
@@ -131,48 +152,37 @@ def runscript(scriptname, args, in_directory=None,
     return status, out, err
 
 
-def runscriptredirect(scriptname, args, stdinfilename, in_directory=None,
-                      fail_ok=False, sandbox=False):
-    """Run a Python script using subprocess().
-
-    Run the given Python script, with the given args, in the given directory,
-    using 'subprocess'.
-    """
+def run_shell_cmd(cmd, fail_ok=False, in_directory=None):
     cwd = os.getcwd()
+    if in_directory:
+        os.chdir(in_directory)
 
-    status = -1
+    print('running: ', cmd)
+    try:
+        p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        (out, err) = p.communicate()
 
-    if sandbox:
-        paths = [os.path.join(os.path.dirname(__file__), "../sandbox")]
-    else:
-        paths = [os.path.join(os.path.dirname(__file__), "../scripts")]
-        paths.extend(os.environ['PATH'].split(':'))
-    for path in paths:
-        scriptfile = os.path.join(path, scriptname)
-        if os.path.isfile(scriptfile):
-            if in_directory:
-                os.chdir(in_directory)
-            sysargs = 'cat ' + stdinfilename + ' | python ' + scriptfile + \
-                " " + args
-            out = open(
-                os.path.join(in_directory, "out"), 'w+', encoding='utf-8')
-            err = open(
-                os.path.join(in_directory, "err"), 'w+', encoding='utf-8')
-            print('running:', scriptname, 'in:', in_directory)
-            print('arguments', sysargs)
-            status = subprocess.call(args=sysargs, stdout=out, stderr=err,
-                                     shell=True)
-            os.chdir(cwd)
-            if status != 0 and not fail_ok:
-                out.seek(0)
-                out = out.read()
-                err.seek(0)
-                err = err.read()
-                print(out)
-                print(err)
-                assert False, (status, out, err)
-
-            return status, out, err
+        out = out.decode('utf-8')
+        err = err.decode('utf-8')
 
-        if sandbox:
-            raise nose.SkipTest("sandbox tests are only run in a repository.")
+        if p.returncode != 0 and not fail_ok:
+            print('out:', out)
+            print('err:', err)
+            raise AssertionError("exit code is non zero: %d" % p.returncode)
+
+        return (p.returncode, out, err)
+    finally:
+        os.chdir(cwd)
+
+
+def longify(listofints):
+    """List of ints => list of longs, only on py2.
+
+    Takes a list of numeric types, and returns longs on python2, or the
+    original list on python3.
+    """
+    # For map(long, [list of ints]) cross-version hackery
+    if sys.version_info.major < 3:
+        return map(long, listofints)
+    return listofints
diff --git a/tests/test-data/empty-file.bz2 b/tests/test-data/empty-file.bz2
new file mode 100644
index 0000000..b56f3b9
Binary files /dev/null and b/tests/test-data/empty-file.bz2 differ
diff --git a/tests/test-data/empty-file.gz b/tests/test-data/empty-file.gz
new file mode 100644
index 0000000..0fad667
Binary files /dev/null and b/tests/test-data/empty-file.gz differ
diff --git a/tests/test-data/paired-broken4.fq.1 b/tests/test-data/paired-broken4.fq.1
new file mode 100644
index 0000000..a13d9b4
--- /dev/null
+++ b/tests/test-data/paired-broken4.fq.1
@@ -0,0 +1,4 @@
+ at SRR797058.3 HWI-ST600:227:C0WR4ACXX:7:1101:17167:2000/1
+NCTACCAAAAAAATGCCCGATAATTCTGACCATTCCTTCCTCATTCTCGTCTGGCGTTTGGTCACGACGCACGATACCTTCTGCACTTGTCAAGACAGCGG
++
+#00@#################################################################################################
diff --git a/tests/test-data/paired-broken4.fq.2 b/tests/test-data/paired-broken4.fq.2
new file mode 100644
index 0000000..b37161b
--- /dev/null
+++ b/tests/test-data/paired-broken4.fq.2
@@ -0,0 +1,4 @@
+ at SRR797058.3 HWI-ST600:227:C0WR4ACXX:7:1101:17167:9999/2
+CTTGACAAGAGCAGAAGTTATCTTGCCTCGGGACCAAACGCCAGACGAGCACGAGGGAGCGATCGTCCGCATTAGCCGGCATTCTTTTGCTAGCAGATCGG
++
+=?###################################################################################################
diff --git a/tests/test-data/paired.fq.2 b/tests/test-data/paired.fq.2
index 4b0ed7b..4ff9afa 100644
--- a/tests/test-data/paired.fq.2
+++ b/tests/test-data/paired.fq.2
@@ -10,4 +10,3 @@ GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCC
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
-
diff --git a/tests/test-data/readaligner-default.json b/tests/test-data/readaligner-default.json
new file mode 100644
index 0000000..c97e8da
--- /dev/null
+++ b/tests/test-data/readaligner-default.json
@@ -0,0 +1,50 @@
+{
+    "scoring_matrix": [
+        -0.06642736173897607,
+        -4.643856189774724,
+        -7.965784284662087,
+        -9.965784284662087
+    ],
+    "transition_probabilities": [
+        [
+            -0.021973842014145723,
+            -13.73189622448781,
+            -14.869792371737484,
+            -6.058239815501842,
+            -19.166033822961197,
+            -21.66853416349038
+        ],
+        [
+            -0.9444728000497686,
+            -1.105331993785005,
+            -7.40360292819022,
+            -6.690896473084504
+        ],
+        [
+            -0.3937937393192493,
+            -2.123673467366609,
+            -7.104364821496794,
+            -8.864604875515933
+        ],
+        [
+            -3.645644436080496,
+            -15.220073662674086,
+            -12.411146320797728,
+            -0.1263680454390087,
+            -8.227232598141855,
+            -11.226627458948142
+        ],
+        [
+            -2.8013509614037972,
+            -8.078453985883888,
+            -1.9197909720107271,
+            -0.7647513448614925
+        ],
+        [
+            -2.8525098984653257,
+            -4.535070816966942,
+            -0.6522388852285496,
+            -2.457038730417613
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/tests/test-data/readaligner-k12.json b/tests/test-data/readaligner-k12.json
new file mode 100644
index 0000000..96e594f
--- /dev/null
+++ b/tests/test-data/readaligner-k12.json
@@ -0,0 +1,50 @@
+{
+    "scoring_matrix": [
+        -0.06642736173897607,
+        -4.643856189774724,
+        -7.965784284662087,
+        -9.965784284662087
+    ],
+    "transition_probabilities": [
+        [
+            -0.026682951271565506,
+            -9.914804535510426,
+            -12.141787036607257,
+            -10.397090021035718,
+            -18.93814224957318,
+            -21.192698501482667
+        ],
+        [
+            -1.3469970008036818,
+            -0.8268851828081922,
+            -10.50575629769457,
+            -9.368461466993008
+        ],
+        [
+            -0.4341897229177519,
+            -2.230759666245372,
+            -9.828796523112176,
+            -11.63615144516978
+        ],
+        [
+            -0.015678000180557785,
+            -13.181473677755502,
+            -14.67136517171856,
+            -6.70130751161339,
+            -15.726469771159012,
+            -18.409279595278313
+        ],
+        [
+            -0.03221351929242559,
+            -6.011510033472284,
+            -8.44845478869957,
+            -8.089881097025156
+        ],
+        [
+            -0.026272943646111175,
+            -6.421067096717085,
+            -7.773052425460627,
+            -9.561548320266915
+        ]
+    ]
+}
diff --git a/tests/test-data/test-fastq-reads.fa b/tests/test-data/test-fastq-reads.fa
new file mode 100644
index 0000000..4bd6eca
--- /dev/null
+++ b/tests/test-data/test-fastq-reads.fa
@@ -0,0 +1,200 @@
+>895:1:1:1246:14654 1:N:0:NNNNN
+CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT
+>895:1:1:1248:9583 1:N:0:NNNNN
+ACTGGGCGTAGACGGTGTCCTCATCGGCACCAGC
+>895:1:1:1252:19493 1:N:0:NNNNN
+CCGGCGTGGTTGGTGAGGTCACTGAGCTTCATGTC
+>895:1:1:1255:18861 1:N:0:NNNNN
+ACGACGAGAAGCTGATCTACCGCGCCGAGCGCATC
+>895:1:1:1264:15854 1:N:0:NNNNN
+CGTGATGATGTGCTTGCGGCCGGAGGGCCTGTTGCCCAGG
+>895:1:1:1265:2265 1:N:0:NNNNN
+TATAGCGTGAGGCGATGACGTTGCTGTCCTTGGCGCGGC
+>895:1:1:1273:17782 1:N:0:NNNNN
+TCGAAAATCACGTGGGAGATGCACTATCACGCGGTCGGTGAGGAAGTGACCGACCACACCGAGCTCGC
+>895:1:1:1274:18571 1:N:0:NNNNN
+AGCAGGCGAACAGCACGCCGAACAATACTGTCTTCATGCCAAACTGCTGAAAGCCGAGCACAGCAGAAATGCTCCAGAG
+>895:1:1:1276:16426 1:N:0:NNNNN
+GCAGGTATTGGTTTGCCTAACGTTGAAATTGCAGGATTAACG
+>895:1:1:1283:17864 1:N:0:NNNNN
+ATTCGTCAACCCGCGGCTCGAGCTGCGCATCC
+>895:1:1:1287:13756 1:N:0:NNNNN
+AGGGGAAATCCATTTCAAAGCGTTCGTGATCACGATAGACCGTCACTAAGCCACTGACTGTATGGAAGCAAAC
+>895:1:1:1290:11501 1:N:0:NNNNN
+CCAATCACTGCGATCGGCGCACGGACCTTGGAGCCGGAGCAG
+>895:1:1:1294:5882 1:N:0:NNNNN
+GGCATTGACTATGGGATCCAAGCGATTGCACATG
+>895:1:1:1295:6189 1:N:0:NNNNN
+GTCTAATCTTCGAGCAACTCCACGCTGTAGTATCATCAAGGTTCAAGATGTTAATGAATAACAATTGCGCAGCAA
+>895:1:1:1295:5208 1:N:0:NNNNN
+GCGCATCGCGCGGCTTGAGGACGCTGTCTTCAAGC
+>895:1:1:1296:1784 1:N:0:NNNNN
+GATGTTGAATTTTTACATCAACCGTGCCGGC
+>895:1:1:1296:6541 1:N:0:NNNNN
+AAACAGCTCTTCGGATTTAAAGATTACGCAGGACATCCGGCAGGCACTAGCGAAAGACAGGGAGCTGTCCACGACGGCGAAGCAC
+>895:1:1:1298:13380 1:N:0:NNNNN
+AGCTGAGCGGGCACATCTTCTTCAAGCACCGCTACTTCGGCTTCGATGACGCGG
+>895:1:1:1299:3449 1:N:0:NNNNN
+TGGCGCGCGCTTCAAATACGAAAACTTGAAGCTCGGCTGGAGGGC
+>895:1:1:1300:2738 1:N:0:NNNNN
+GTCCGTGCAAAGGGCACTACGTTTGAAGTCCCAGCAATTTCGGGCAGTTCACCGGCACTTT
+>895:1:1:1303:6251 2:N:0:NNNNN
+CGAGATTACAGGTGGGATTCCCGAACCGATATTTGGTTAACCTAAGGAGGCTGAG
+>895:1:1:1303:14389 2:N:0:NNNNN
+GTCGAGTCCGACAACCCGATCATGCAGCCGCGCCATGCCCGCTTGCCAG
+>895:1:1:1306:15360 2:N:0:NNNNN
+ACTTTACCTGCTCGGCACCTCCATCGCGCGACCTCTGATTGCCAA
+>895:1:1:1307:4308 2:N:0:NNNNN
+GAGTTAAAATGCCTGCCGCGTGCCCGGCCATGACAAAAATCTTTCTGCTTCGCACATCGC
+>895:1:1:1308:20421 2:N:0:NNNNN
+CAGATGCCCGGCTGCTTTTTCCGGATCCGAAATCCTATTATGGCATTCCGCTCGATCCGC
+>895:1:1:1308:2539 1:N:0:NNNNN
+TTCCGGCGCCACGCGGACAGATCACGGACCGCAACGGAGTGCCGCTGGCG
+>895:1:1:1308:2539 2:N:0:NNNNN
+CGGCAGCTTGTCCTTGATTCCCTCGTACTCCTGCGGCGTGAGATTTGGAGCGATCTCAAACGGCATG
+>895:1:1:1309:4153 2:N:0:NNNNN
+GGACGCGCACGCCGGTGAGGAACGACCCGCCCGCGCGGGCGTGGTCGCCGGGGCCGTCC
+>895:1:1:1312:10985 2:N:0:NNNNN
+GAGCGCGTCGAGCCCCGCGTGGTCGACCGCGAGCCGTTCGAGCGACCGCCAGTGGCTCCGG
+>895:1:1:1314:10430 2:N:0:NNNNN
+CCTGACTCTCGGCCTGGCGCTTACTTTCGCGCCTGGAAGCCTCACGACACAGCAGAAGTCTGCCTGGCGCAC
+>895:1:1:1318:10532 1:N:0:NNNNN
+TACGCTATCAACGGGTTCGGTCGTATCGGGCGCAACGTGCTTCGCGCCATGACGCAAAAACAAGTCGAGCGGGTTCGCGCAATCAATGACCTCACTGACACGCGCAC
+>895:1:1:1318:10532 2:N:0:NNNNN
+ATTCGAGCACAACATCGACATCCAATTTTTTCCACGGCAAATTCGCCGGGTCGCGCTCC
+>895:1:1:1320:11648 2:N:0:NNNNN
+AACGATTCGAACACGGTAATTGCCGTTTTCGGCGGCGACTTCGAAGGCGGCG
+>895:1:1:1325:8700 1:N:0:NNNNN
+CAGCTGCAGCAATTCGCCGTCGTGGGTTTCGATTGGGGCGGCCGCGCCGC
+>895:1:1:1326:7273 1:N:0:NNNNN
+TATCAATACTGGCAACGCAGGTGTTGCTGGCGAGGCCAACACTATCCGTATCGG
+>895:1:1:1326:7273 2:N:0:NNNNN
+GTGGGATGCCTTCGGGATCAACCTCTTTTTTATAACGGAACGTGACCGGTTTGAGCGCGTGAATA
+>895:1:1:1327:13028 2:N:0:NNNNN
+ACTAAGTGGCGTATACCCTTCCATTTCAACTCGCAGTTGATGGTTACCCTGAGCGAGAGACTCCAGCCGAAGCCGGGTTTGTCCCTTGAGTTCTCCATCAATAAAAACTCTGGC
+>895:1:1:1327:15301 2:N:0:NNNNN
+TCAGTCGTCGAACGCTATACCGGTCAACTCGTCTGTCGAGCCGG
+>895:1:1:1328:3708 2:N:0:NNNNN
+CCGAGGCTGAGGGCGAAAGGCAACAATGAGATCGTCGACATCGTT
+>895:1:1:1330:9540 2:N:0:NNNNN
+TCCTCCATAGAGTGTTCCGAAGTAAGGTGCCAACGGGTCTTCCTTCGGGAGACTTGCCTGGTATTTTGCCAGCTTCTGGTGAACCGAATCC
+>895:1:1:1331:1766 1:N:0:NNNNN
+TGCTGCTGATCGGCTGTGCCCACGAACTTTCTGGC
+>895:1:1:1333:2512 1:N:0:NNNNN
+TGGCGAGAGTCGCCTCAGGGCGGCATATCTTTGAGGCGAATGGGAATCGGTTTGCCCTGTTCCT
+>895:1:1:1333:2512 2:N:0:NNNNN
+CTTACGCCTTGTCTTTGAGCGCCGTGGAGCTCGGATGGCGACGCCTGCGC
+>895:1:1:1333:2445 2:N:0:NNNNN
+AGATTGTCGGAGCGCCAGTTCGAGCTGGCGTGCTCTGCCGCCCCCTATGT
+>895:1:1:1334:19532 2:N:0:NNNNN
+AGTACTACGTCATCGAATCGCAGAATCCGAAGATGCAGGGCTTCGACGGCGCCGTGAT
+>895:1:1:1335:19932 1:N:0:NNNNN
+GACTCACCTCACAGTCTTCCAGGCCCGCGAAAGCAATGATAGGGCAATCGAGAGGCTCTTCAGTCGCATAAGTATACGTCTCACAGATCG
+>895:1:1:1335:19932 2:N:0:NNNNN
+CTATCCACCAGTTACCCACAGCCGAGTTCGTGGGAGAACTGCGTCGTCTCAATGGTACATCAGAG
+>895:1:1:1336:4956 1:N:0:NNNNN
+GCTCGGCGAGGTGTTCCGCCAGCGCACGACTGCGGAATGGGTCGATCTCCTC
+>895:1:1:1336:4956 2:N:0:NNNNN
+CGGTACTCGATCGGCGTGCCCGAGAAGCGCATCGGGCTCGC
+>895:1:1:1338:15407 1:N:0:NNNNN
+AGAAGAAGTCCCAGACGTCGCCCACCACCGGCACCGAACCGCC
+>895:1:1:1338:7557 2:N:0:NNNNN
+ATGTTAAACTCCGGTCGAACGGCCTTGGCACGGGCG
+>895:1:1:1338:6614 2:N:0:NNNNN
+CACAGGTCACTCAGCGGCTTATACAGCTTGCTCCGCCTTTCGCTCGGTCGAGAACACGATCTTTCCGTCACGCCTCATGG
+>895:1:1:1340:19387 2:N:0:NNNNN
+ATTTCGCGATTCTTGTGCTGGCTGCGCTCCATTTGGCAGGCTACCACCACGCCAGTGGGAAGATGCGTGATGCGCAC
+>895:1:1:1342:20695 2:N:0:NNNNN
+CGAGATCATAGGTGCGTTCGGCTTGATCTGGGCGAGCTGGCTTTCGAGATAGAACTTGCGCTCGTCG
+>895:1:1:1342:11001 1:N:0:NNNNN
+ATAACGTTATTGGCTGTCGTGACACCGCTGCCGGC
+>895:1:1:1342:11001 2:N:0:NNNNN
+TCACTTGTGGAGCGAACACGGCCAGCGGTGCTCGCGCACTCGCTAGCAACACAAGCGGTGCCAAAA
+>895:1:1:1344:1968 1:N:0:NNNNN
+TGACCTTTTGTTTGAGCAGGGTTCCGACTGCCTGGACGTCCTGCTTCTTGACCGCCTGGATCAATGAAGACTCGGCCG
+>895:1:1:1347:8723 1:N:0:NNNNN
+ACGGATCGATGCCGCGAGCGAGGAGGCGATCGACGAGCCGGTCATAGAAGTCCAGGCCTGCCGGGTTCACCGCGCCC
+>895:1:1:1347:8723 2:N:0:NNNNN
+GACGGCCGAGGGCCCTGCATCTGGGACAACTTTGT
+>895:1:1:1347:3237 1:N:0:NNNNN
+ACCACGTTCTTGACCTGCGCCTTGCCAACAGCCG
+>895:1:1:1347:3237 2:N:0:NNNNN
+TGGCGGTCGATCCCAAGCTCTACGAGGGCAGCTGGAGCGG
+>895:1:1:1348:18672 2:N:0:NNNNN
+AGAATAATTTCATAATCGGGATAGTTCAGCCTGCCTAAAGAATCCAGGC
+>895:1:1:1348:1257 1:N:0:NNNNN
+ATCGAATCGGATCTGGCAGCTTTTGCCAGTTGGTCGGCGG
+>895:1:1:1349:15165 2:N:0:NNNNN
+GAGCCGGGGGCCGAAGAAGCTGCGTTCCGTCGCCGGCAGTGGCTC
+>895:1:1:1349:21156 1:N:0:NNNNN
+ACCGCAAGTCGGCGACCGGCAAGTACGAAAAGACC
+>895:1:1:1349:13984 1:N:0:NNNNN
+GAAAGGATCAACTGCCGCTTCAAGTTCCAGAAACGCCGTCAGCTTTTCGTCGGCGCGCACAACGCAACGCTTCCC
+>895:1:1:1351:14718 1:N:0:NNNNN
+AGCGAACGAAGAGCCAAAAAAGCTCCACCTCTCGACAAG
+>895:1:1:1351:14718 2:N:0:NNNNN
+TTCAAAAGTTTCAGTGTGCAAGCGCCGTCGGAATAGCCACCACCTAAACCGGCACCTACCGGTATCACTTTATGTACATG
+>895:1:1:1352:5369 2:N:0:NNNNN
+ATCAAACCCCTCAATCCGCACCGCGGGCTTGCTGCCAACATTGGAAAGATGGCATCCTCCTTTCCTGGGGGATGGGGAG
+>895:1:1:1353:6642 1:N:0:NNNNN
+TGATTATGTTGCGAATGGGTCTGACGCTGCTGG
+>895:1:1:1355:13535 1:N:0:NNNNN
+AAGCGCGATGATCGCAACCGTGCTTTATTCCGACCTTTCCGGTGGCCGG
+>895:1:1:1355:13535 2:N:0:NNNNN
+CACTTTCACCATGAGCGCGTTCTTGCGTCGACAAATGGAGCAGTCGCAGGTGGTCAGTTCCGGGAAATCCGTATCGATCTCGAAGGTGACCGCGCCGCAG
+>895:1:1:1357:19736 1:N:0:NNNNN
+AAAAGCATTACCCAACCGAACACACCGGCTGCAAATAGCCCAACCGCAGCGAGGCCGTTTAATCGTTCATTCCGAC
+>895:1:1:1357:19736 2:N:0:NNNNN
+TTGGCCGCTTTTTAATTTGTTCGGATCGGTTTGCCAAAACGGGGATATTTGTCAAGCGGGAAACTTAGGAAAAATTTCTTAAGACTCATGCCTCCGTGTC
+>895:1:1:1357:4413 2:N:0:NNNNN
+CCCCGCCGATGACCGACAGGTTCTGTCCAGGTGCT
+>895:1:1:1358:4953 1:N:0:NNNNN
+AGAAGTTCGACCGCATGGTGAGCCGCTATGACCGCCAGGCAGATGGCTCGCTCAAAGAAGAACCGCGGG
+>895:1:1:1362:3983 2:N:0:NNNNN
+TCGATATCGCCATCTTTTAAACAGGCGATCGGCACAACTTTGAAACCCGCCATAACCGCGCTCGC
+>895:1:1:1363:9988 2:N:0:NNNNN
+TCTCGAAGTACCAACCCATCGAGCCGTAGGTGTGGCCGGGCAGGCGCAGGGCCTTGATCTTCATCCCC
+>895:1:1:1363:11839 1:N:0:NNNNN
+ACAAGTACACCCTGCGCACCGCGCGCGGGATGACGGTGGCGATC
+>895:1:1:1365:7390 2:N:0:NNNNN
+GCCCCAGCCCTTGAAGTCCGTGTGCCAGTGCGTGCGCGTCCAGATCGGCGGACC
+>895:1:1:1365:12527 2:N:0:NNNNN
+TCGTTGTCGTAGTCGCCCCAGATGCCCGGGCCGCCCACCTTGGCGACGCTCGTAAAGCGCTTGCCCATCTCGTT
+>895:1:1:1368:4434 2:N:0:NNNNN
+CCAAGGAAGAATTTAGAGGAGTTACGAGTCATTCTTCCTCCGGCGCCTTCTGCAACAGCTCGTGCAACAGCAACCTTGCTTTGCTCCAGTCCC
+>895:1:1:1373:4848 1:N:0:NNNNN
+CGGTTGCGACGAGCGAGTCGGAGCCGACACCGTCGAGGATCGTC
+>895:1:1:1373:13994 1:N:0:NNNNN
+GATTCGACATTGTTGACGGCGGTCGGGCAGCCGAACAGGCCGACATTGGCCGG
+>895:1:1:1373:13994 2:N:0:NNNNN
+AGGCTGCCGTCGATCAGGCTTATGAGGCCAAGCTGATCGGCAAGGACAATATCAACGGCTGGCCGTTCGAC
+>895:1:1:1376:16513 2:N:0:NNNNN
+CGAACGATTTATCGACCACGACGGCACGCGTTGGTACCACACCGGCGATCGCGTGCGCCG
+>895:1:1:1378:18986 1:N:0:NNNNN
+GACCAGCGGAACAACGGCAAAACTGAGCATCAAACTCAGGATCACCTGGCTAAGGATCAATAATTGAGCGGTTCCGCTTTCTCCCATCATGGCCG
+>895:1:1:1378:18986 2:N:0:NNNNN
+GCGGTCGCTCTCCTGGCCTCGGGTCAGAATTCCTCTCTGACCGGAACACTTGCCGGGCAG
+>895:1:1:1381:7062 2:N:0:NNNNN
+GCCGGCTCCCCACCGACGACAGCACGTACCCCGGCG
+>895:1:1:1381:4958 1:N:0:NNNNN
+ATCGAGGTGCACACCGCGAGCTTCCGCACGACGCGCGG
+>895:1:1:1381:4958 2:N:0:NNNNN
+AAGTGCTCGCGATAGGCCTCCCACAAGACGCCGCGGCGCGCGTAGGGCGATGAGATCCCGAGCAGCAGCGCG
+>895:1:1:1382:5012 2:N:0:NNNNN
+CCGATGTCGACGTCGCCGGTGTCGGCGGGCAGCC
+>895:1:1:1383:3089 2:N:0:NNNNN
+GGTCCGGTCTAAATCTTGTCCGGAGCCCAGATGATGAAATTGTCCCGGTTCGGCATCTTCACTTGCGGCA
+>895:1:1:1383:14728 1:N:0:NNNNN
+TGATCAACTTTGCTCTCCAGCCCGACCAGCAG
+>895:1:1:1383:6776 1:N:0:NNNNN
+CAATACGAACAAGTTCGTTGTTGGAGATACCGCGGAAGTCTACGACACGACTTCGCTCAACGTCCGCG
+>895:1:1:1384:20217 1:N:0:NNNNN
+CAGTGGCGACGACAAAAGCAAAGGGCCACGAGTTGTACGCCTGTTTGTTTTTGTCTCGGAATCCGGTGTGCATGATGTGTGT
+>895:1:1:1384:20217 2:N:0:NNNNN
+CCACATTCGTCCCAGTGAGAGACAAACCAAAAACCAAACGAACCTTTTGAGCCAGTTTGTGCCG
+>895:1:1:1386:7536 1:N:0:NNNNN
+CAGGCGGCGTAAGCCCGGCGTCGCGGTCACTGCGACGGCGCCGACGACGAGCGTGAGGGCGGCGTCGAGCGGC
+>895:1:1:1386:14753 1:N:0:NNNNN
+CAAGCCCATGCTCTACGCCGGCGGCTCATTCGTGCTCATGCCCGGCTACG
+>895:1:1:1388:11093 2:N:0:NNNNN
+AACCATGAGCAACCGGTTCGAGTGCGAGATCAGCAAAGTCGAAAA
diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py
index 5e08aba..2e21119 100644
--- a/tests/test_counting_hash.py
+++ b/tests/test_counting_hash.py
@@ -19,12 +19,12 @@ import screed
 
 import nose
 from nose.plugins.attrib import attr
+from nose.tools import assert_raises
+
 
 MAX_COUNT = 255
 MAX_BIGCOUNT = 65535
 
-#
-
 # from http://www.rsok.com/~jrm/printprimes.html
 PRIMES_1m = [1000003, 1009837]
 PRIMES_100m = [100009979, 100000007]
@@ -574,7 +574,7 @@ def test_save_load_large():
         inpath = utils.get_test_data('random-20-a.fa')
         savepath = utils.get_temp_filename(ctfile)
 
-        sizes = khmer.get_n_primes_near_x(1, 2**31 + 1000)
+        sizes = khmer.get_n_primes_near_x(1, 2 ** 31 + 1000)
 
         orig = khmer._CountingHash(12, sizes)
         orig.consume_fasta(inpath)
@@ -605,8 +605,8 @@ def test_save_load():
     ht = khmer._CountingHash(12, sizes)
     try:
         ht.load(savepath)
-    except IOError as err:
-        assert 0, 'Should not produce an IOError: ' + str(err)
+    except OSError as err:
+        assert 0, 'Should not produce an OSError: ' + str(err)
 
     tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
@@ -638,7 +638,7 @@ def test_load_truncated():
         try:
             ht = khmer.load_counting_hash(truncpath)
             assert 0, "this should not be reached!"
-        except IOError as err:
+        except OSError as err:
             print(str(err))
 
 
@@ -667,8 +667,8 @@ def test_load_gz():
     ht = khmer._CountingHash(12, sizes)
     try:
         ht.load(loadpath)
-    except IOError as err:
-        assert 0, "Should not produce an IOError: " + str(err)
+    except OSError as err:
+        assert 0, "Should not produce an OSError: " + str(err)
 
     tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
@@ -694,8 +694,8 @@ def test_save_load_gz():
     ht = khmer._CountingHash(12, sizes)
     try:
         ht.load(savepath)
-    except IOError as err:
-        assert 0, 'Should not produce an IOError: ' + str(err)
+    except OSError as err:
+        assert 0, 'Should not produce an OSError: ' + str(err)
 
     tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
@@ -707,6 +707,17 @@ def test_save_load_gz():
     assert x == y, (x, y)
 
 
+def test_load_empty_files():
+    def do_load_ct(fname):
+        with assert_raises(OSError):
+            ct = khmer.load_counting_hash(fname)
+
+    # Check empty files, compressed or not
+    for ext in ['', '.gz']:
+        fn = utils.get_test_data('empty-file' + ext)
+        do_load_ct(fn)
+
+
 def test_trim_full():
     hi = khmer.CountingHash(6, 1e6, 2)
 
@@ -866,8 +877,8 @@ def test_maxcount_with_bigcount_save():
     kh = khmer.CountingHash(1, 1, 1)
     try:
         kh.load(savepath)
-    except IOError as err:
-        assert 0, "Should not produce an IOError: " + str(err)
+    except OSError as err:
+        assert 0, "Should not produce an OSError: " + str(err)
 
     c = kh.get('AAAA')
     assert c == 1000, "should be able to count to 1000: %d" % c
@@ -885,8 +896,8 @@ def test_bigcount_save():
     kh = khmer.CountingHash(1, 1, 1)
     try:
         kh.load(savepath)
-    except IOError as err:
-        assert 0, "Should not produce an IOError: " + str(err)
+    except OSError as err:
+        assert 0, "Should not produce an OSError: " + str(err)
 
     # set_use_bigcount should still be True after load (i.e. should be saved)
 
@@ -909,8 +920,8 @@ def test_nobigcount_save():
     kh = khmer.CountingHash(1, 1, 1)
     try:
         kh.load(savepath)
-    except IOError as err:
-        assert 0, 'Should not produce an IOError: ' + str(err)
+    except OSError as err:
+        assert 0, 'Should not produce an OSError: ' + str(err)
 
     # set_use_bigcount should still be False after load (i.e. should be saved)
 
@@ -974,7 +985,11 @@ def test_get_ksize():
 
 def test_get_hashsizes():
     kh = khmer.CountingHash(22, 100, 4)
-    assert kh.hashsizes() == [97L, 89L, 83L, 79L], kh.hashsizes()
+    # Py2/3 hack, longify converts to long in py2, remove once py2 isn't
+    # supported any longer.
+    expected = utils.longify([97, 89, 83, 79])
+    assert kh.hashsizes() == expected, kh.hashsizes()
+
 
 # def test_collect_high_abundance_kmers():
 #    seqpath = utils.get_test_data('test-abund-read-2.fa')
@@ -983,9 +998,6 @@ def test_get_hashsizes():
 #    hb = kh.collect_high_abundance_kmers(seqpath, 2, 4)
 
 
-#
-
-
 def test_load_notexist_should_fail():
     savepath = utils.get_temp_filename('tempcountingsave0.ht')
 
@@ -993,7 +1005,7 @@ def test_load_notexist_should_fail():
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1017,7 +1029,7 @@ def test_load_truncated_should_fail():
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1028,7 +1040,7 @@ def test_load_gz_notexist_should_fail():
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1052,7 +1064,7 @@ def test_load_gz_truncated_should_fail():
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1064,7 +1076,7 @@ def test_counting_file_version_check():
     try:
         ht.load(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1076,7 +1088,7 @@ def test_counting_gz_file_version_check():
     try:
         ht.load(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1088,7 +1100,7 @@ def test_counting_file_type_check():
     try:
         kh.load(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1102,7 +1114,7 @@ def test_counting_gz_file_type_check():
     try:
         kh.load(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -1130,7 +1142,7 @@ def test_consume_absentfasta():
     try:
         countingtable.consume_fasta("absent_file.fa")
         assert 0, "This should fail"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
 
 
@@ -1145,7 +1157,7 @@ def test_consume_absentfasta_with_reads_parser():
         readparser = ReadParser(utils.get_test_data('empty-file'))
         countingtable.consume_fasta_with_reads_parser(readparser)
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
     except ValueError as err:
         print(str(err))
@@ -1416,8 +1428,8 @@ def test_abund_dist_gz_bigcount():
     # load the compressed bigcount table
     try:
         counting_hash = khmer.load_counting_hash(outfile)
-    except IOError as err:
-        assert 0, 'Should not produce IOError: ' + str(err)
+    except OSError as err:
+        assert 0, 'Should not produce OSError: ' + str(err)
     hashsizes = counting_hash.hashsizes()
     kmer_size = counting_hash.ksize()
     tracking = khmer._Hashbits(kmer_size, hashsizes)
diff --git a/tests/test_counting_single.py b/tests/test_counting_single.py
index a76e63c..b7256c5 100644
--- a/tests/test_counting_single.py
+++ b/tests/test_counting_single.py
@@ -69,7 +69,7 @@ def test_hashtable_n_entries():
 
 
 def test_complete_no_collision():
-    kh = khmer._CountingHash(4, [4**4])
+    kh = khmer._CountingHash(4, [4 ** 4])
 
     for i in range(0, kh.n_entries()):
         s = khmer.reverse_hash(i, 4)
@@ -318,7 +318,7 @@ def test_very_short_read():
 class Test_ConsumeString(object):
 
     def setup(self):
-        self.kh = khmer._CountingHash(4, [4**4])
+        self.kh = khmer._CountingHash(4, [4 ** 4])
 
     def test_n_occupied(self):
         assert self.kh.n_occupied() == 0
diff --git a/tests/test_filter.py b/tests/test_filter.py
index 2ff9091..3af64ff 100644
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@@ -41,7 +41,7 @@ class Test_Filter(object):
         try:
             ht.consume_fasta("nonexistent")
             assert 0, "should fail"
-        except IOError as err:
+        except OSError as err:
             print(str(err))
         ht.output_fasta_kmer_pos_freq(filename, outname)
         try:
diff --git a/tests/test_functions.py b/tests/test_functions.py
index 32f3f99..bcc6739 100644
--- a/tests/test_functions.py
+++ b/tests/test_functions.py
@@ -14,7 +14,10 @@ from . import khmer_tst_utils as utils
 from khmer.utils import (check_is_pair, broken_paired_reader, check_is_left,
                          check_is_right)
 from khmer.kfile import check_input_files
-from cStringIO import StringIO
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
 
 
 def test_forward_hash():
@@ -83,9 +86,7 @@ def test_get_primes_fal():
     try:
         primes = khmer.get_n_primes_near_x(5, 5)
         assert 0, "previous statement should fail"
-    except AssertionError:
-        raise
-    except Exception as err:
+    except RuntimeError as err:
         assert "unable to find 5 prime numbers < 5" in str(err)
 
 
diff --git a/tests/test_hashbits.py b/tests/test_hashbits.py
index 12a4f09..47b5928 100644
--- a/tests/test_hashbits.py
+++ b/tests/test_hashbits.py
@@ -1,12 +1,12 @@
 from __future__ import print_function
 from __future__ import absolute_import
 #
-# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# This file is part of khmer, htabletps://github.com/dib-lab/khmer/, and is
+# Copyrightable (C) Michigan State University, 2009-2015. It is licensed under
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=missing-docstring,protected-access
+# pylint: disable=missing-docstring,protected-access,no-member,
 import khmer
 from khmer import ReadParser
 
@@ -31,79 +31,79 @@ def test_toobig():
 
 
 def test__get_set_tag_density():
-    ht = khmer._Hashbits(32, [1])
+    htableable = khmer._Hashbits(32, [1])
 
-    orig = ht._get_tag_density()
+    orig = htableable._get_tag_density()
     assert orig != 2
-    ht._set_tag_density(2)
-    assert ht._get_tag_density() == 2
+    htableable._set_tag_density(2)
+    assert htableable._get_tag_density() == 2
 
 
 def test_update_from():
-    ht = khmer.Hashbits(5, 1000, 4)
-    ht2 = khmer.Hashbits(5, 1000, 4)
+    htableable = khmer.Hashbits(5, 1000, 4)
+    other_htableable = khmer.Hashbits(5, 1000, 4)
 
-    assert ht.get('AAAAA') == 0
-    assert ht.get('GCGCG') == 0
-    assert ht2.get('AAAAA') == 0
-    assert ht2.get('GCGCG') == 0
+    assert htableable.get('AAAAA') == 0
+    assert htableable.get('GCGCG') == 0
+    assert other_htableable.get('AAAAA') == 0
+    assert other_htableable.get('GCGCG') == 0
 
-    ht2.count('AAAAA')
+    other_htableable.count('AAAAA')
 
-    assert ht.get('AAAAA') == 0
-    assert ht.get('GCGCG') == 0
-    assert ht2.get('AAAAA') == 1
-    assert ht2.get('GCGCG') == 0
+    assert htableable.get('AAAAA') == 0
+    assert htableable.get('GCGCG') == 0
+    assert other_htableable.get('AAAAA') == 1
+    assert other_htableable.get('GCGCG') == 0
 
-    ht.count('GCGCG')
+    htableable.count('GCGCG')
 
-    assert ht.get('AAAAA') == 0
-    assert ht.get('GCGCG') == 1
-    assert ht2.get('AAAAA') == 1
-    assert ht2.get('GCGCG') == 0
+    assert htableable.get('AAAAA') == 0
+    assert htableable.get('GCGCG') == 1
+    assert other_htableable.get('AAAAA') == 1
+    assert other_htableable.get('GCGCG') == 0
 
-    ht.update(ht2)
+    htableable.update(other_htableable)
 
-    assert ht.get('AAAAA') == 1
-    assert ht.get('GCGCG') == 1
-    assert ht2.get('AAAAA') == 1
-    assert ht2.get('GCGCG') == 0
+    assert htableable.get('AAAAA') == 1
+    assert htableable.get('GCGCG') == 1
+    assert other_htableable.get('AAAAA') == 1
+    assert other_htableable.get('GCGCG') == 0
 
 
 def test_update_from_diff_ksize_2():
-    ht = khmer.Hashbits(5, 1000, 4)
-    ht2 = khmer.Hashbits(4, 1000, 4)
+    htableable = khmer.Hashbits(5, 1000, 4)
+    other_htableable = khmer.Hashbits(4, 1000, 4)
 
     try:
-        ht.update(ht2)
+        htableable.update(other_htableable)
         assert 0, "should not be reached"
     except ValueError as err:
         print(str(err))
 
     try:
-        ht2.update(ht)
+        other_htableable.update(htableable)
         assert 0, "should not be reached"
     except ValueError as err:
         print(str(err))
 
 
 def test_update_from_diff_tablesize():
-    ht = khmer.Hashbits(5, 100, 4)
-    ht2 = khmer.Hashbits(5, 1000, 4)
+    htableable = khmer.Hashbits(5, 100, 4)
+    other_htableable = khmer.Hashbits(5, 1000, 4)
 
     try:
-        ht.update(ht2)
+        htableable.update(other_htableable)
         assert 0, "should not be reached"
     except ValueError as err:
         print(str(err))
 
 
 def test_update_from_diff_num_tables():
-    ht = khmer.Hashbits(5, 1000, 3)
-    ht2 = khmer.Hashbits(5, 1000, 4)
+    htableable = khmer.Hashbits(5, 1000, 3)
+    other_htableable = khmer.Hashbits(5, 1000, 4)
 
     try:
-        ht.update(ht2)
+        htableable.update(other_htableable)
         assert 0, "should not be reached"
     except ValueError as err:
         print(str(err))
@@ -112,45 +112,45 @@ def test_update_from_diff_num_tables():
 def test_n_occupied_1():
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 100000  # size of hashtable
-    N_HT = 1  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 100000  # size of hashtableable
+    num_htableables = 1  # number of hashtableables
 
     # test modified c++ n_occupied code
-    ht1 = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htableable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
-    for n, record in enumerate(fasta_iter(open(filename))):
-        ht1.consume(record['sequence'])
+    for _, record in enumerate(fasta_iter(open(filename))):
+        htableable.consume(record['sequence'])
 
     # this number calculated independently
-    assert ht1.n_occupied() == 3884, ht1.n_occupied()
+    assert htableable.n_occupied() == 3884, htableable.n_occupied()
 
 
 def test_bloom_python_1():
     # test python code to count unique kmers using bloom filter
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 100000  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 100000  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht2 = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htableable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
     n_unique = 0
-    for n, record in enumerate(fasta_iter(open(filename))):
+    for _, record in enumerate(fasta_iter(open(filename))):
         sequence = record['sequence']
         seq_len = len(sequence)
-        for n in range(0, seq_len + 1 - K):
-            kmer = sequence[n:n + K]
-            if (not ht2.get(kmer)):
+        for n in range(0, seq_len + 1 - ksize):
+            kmer = sequence[n:n + ksize]
+            if not htableable.get(kmer):
                 n_unique += 1
-            ht2.count(kmer)
+            htableable.count(kmer)
 
     assert n_unique == 3960
-    assert ht2.n_occupied() == 3885, ht2.n_occupied()
+    assert htableable.n_occupied() == 3885, htableable.n_occupied()
 
     # this number equals n_unique
-    assert ht2.n_unique_kmers() == 3960, ht2.n_unique_kmers()
+    assert htableable.n_unique_kmers() == 3960, htableable.n_unique_kmers()
 
 
 def test_bloom_c_1():
@@ -158,74 +158,76 @@ def test_bloom_c_1():
 
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 100000  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 100000  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht3 = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htableable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
-    for n, record in enumerate(fasta_iter(open(filename))):
-        ht3.consume(record['sequence'])
+    for _, record in enumerate(fasta_iter(open(filename))):
+        htableable.consume(record['sequence'])
 
-    assert ht3.n_occupied() == 3885
-    assert ht3.n_unique_kmers() == 3960
+    assert htableable.n_occupied() == 3885
+    assert htableable.n_unique_kmers() == 3960
 
 
 def test_n_occupied_2():  # simple one
-    K = 4
-    HT_SIZE = 10  # use 11
-    N_HT = 1
+    ksize = 4
+    htable_size = 10  # use 11
+    num_htableables = 1
 
-    ht1 = khmer._Hashbits(K, [11])
-    ht1.count('AAAA')  # 00 00 00 00 = 0
-    assert ht1.n_occupied() == 1
+    htableable = khmer._Hashbits(ksize, [11])
+    htableable.count('AAAA')  # 00 00 00 00 = 0
+    assert htableable.n_occupied() == 1
 
-    ht1.count('ACTG')  # 00 10 01 11 =
-    assert ht1.n_occupied() == 2
+    htableable.count('ACTG')  # 00 10 01 11 =
+    assert htableable.n_occupied() == 2
 
-    ht1.count('AACG')  # 00 00 10 11 = 11  # collision 1
+    htableable.count('AACG')  # 00 00 10 11 = 11  # collision 1
 
-    assert ht1.n_occupied() == 2
-    ht1.count('AGAC')   # 00  11 00 10 # collision 2
-    assert ht1.n_occupied() == 2, ht1.n_occupied()
+    assert htableable.n_occupied() == 2
+    htableable.count('AGAC')   # 00  11 00 10 # collision 2
+    assert htableable.n_occupied() == 2, htableable.n_occupied()
 
 
 def test_bloom_c_2():  # simple one
-    K = 4
-
-    # use only 1 hashtable, no bloom filter
-    ht1 = khmer._Hashbits(K, [11])
-    ht1.count('AAAA')  # 00 00 00 00 = 0
-    ht1.count('ACTG')  # 00 10 01 11 =
-    assert ht1.n_unique_kmers() == 2
-    ht1.count('AACG')  # 00 00 10 11 = 11  # collision  with 1st kmer
-    assert ht1.n_unique_kmers() == 2
-    ht1.count('AGAC')   # 00  11 00 10 # collision  with 2nd kmer
-    assert ht1.n_unique_kmers() == 2
-
-    # use two hashtables with 11,13
-    ht2 = khmer._Hashbits(K, [11, 13])
-    ht2.count('AAAA')  # 00 00 00 00 = 0
-
-    ht2.count('ACTG')  # 00 10 01 11 = 2*16 +4 +3 = 39
-    assert ht2.n_unique_kmers() == 2
-    ht2.count('AACG')  # 00 00 10 11 = 11  # collision with only 1st kmer
-    assert ht2.n_unique_kmers() == 3
-    ht2.count('AGAC')   # 00  11 00 10  3*16 +2 = 50
+    ksize = 4
+
+    # use only 1 hashtableable, no bloom filter
+    htableable = khmer._Hashbits(ksize, [11])
+    htableable.count('AAAA')  # 00 00 00 00 = 0
+    htableable.count('ACTG')  # 00 10 01 11 =
+    assert htableable.n_unique_kmers() == 2
+    htableable.count('AACG')  # 00 00 10 11 = 11  # collision  with 1st kmer
+    assert htableable.n_unique_kmers() == 2
+    htableable.count('AGAC')   # 00  11 00 10 # collision  with 2nd kmer
+    assert htableable.n_unique_kmers() == 2
+
+    # use two hashtableables with 11,13
+    other_htableable = khmer._Hashbits(ksize, [11, 13])
+    other_htableable.count('AAAA')  # 00 00 00 00 = 0
+
+    other_htableable.count('ACTG')  # 00 10 01 11 = 2*16 +4 +3 = 39
+    assert other_htableable.n_unique_kmers() == 2
+    # 00 00 10 11 = 11  # collision with only 1st kmer
+    other_htableable.count('AACG')
+    assert other_htableable.n_unique_kmers() == 3
+    other_htableable.count('AGAC')
+    # 00  11 00 10  3*16 +2 = 50
     # collision with both 2nd and 3rd kmers
 
-    assert ht2.n_unique_kmers() == 3
+    assert other_htableable.n_unique_kmers() == 3
 
 
 def test_filter_if_present():
-    ht = khmer._Hashbits(32, [3, 5])
+    htable = khmer._Hashbits(32, [3, 5])
 
     maskfile = utils.get_test_data('filter-test-A.fa')
     inputfile = utils.get_test_data('filter-test-B.fa')
     outfile = utils.get_temp_filename('filter')
 
-    ht.consume_fasta(maskfile)
-    ht.filter_if_present(inputfile, outfile)
+    htable.consume_fasta(maskfile)
+    htable.filter_if_present(inputfile, outfile)
 
     records = list(fasta_iter(open(outfile)))
     assert len(records) == 1
@@ -234,95 +236,95 @@ def test_filter_if_present():
 
 def test_combine_pe():
     inpfile = utils.get_test_data('combine_parts_1.fa')
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
-    ht.consume_partitioned_fasta(inpfile)
-    assert ht.count_partitions() == (2, 0)
+    htable.consume_partitioned_fasta(inpfile)
+    assert htable.count_partitions() == (2, 0)
 
-    s1 = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT"
-    pid1 = ht.get_partition_id(s1)
+    first_seq = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT"
+    pid1 = htable.get_partition_id(first_seq)
 
-    s2 = "CAAATGTACATGCACTTAAAATCATCCAGCCG"
-    pid2 = ht.get_partition_id(s2)
+    second_seq = "CAAATGTACATGCACTTAAAATCATCCAGCCG"
+    pid2 = htable.get_partition_id(second_seq)
 
     assert pid1 == 2
     assert pid2 == 80293
 
-    ht.join_partitions(pid1, pid2)
+    htable.join_partitions(pid1, pid2)
 
-    pid1 = ht.get_partition_id(s1)
-    pid2 = ht.get_partition_id(s2)
+    pid1 = htable.get_partition_id(first_seq)
+    pid2 = htable.get_partition_id(second_seq)
 
     assert pid1 == pid2
-    assert ht.count_partitions() == (1, 0)
+    assert htable.count_partitions() == (1, 0)
 
 
 def test_load_partitioned():
     inpfile = utils.get_test_data('combine_parts_1.fa')
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
-    ht.consume_partitioned_fasta(inpfile)
-    assert ht.count_partitions() == (2, 0)
+    htable.consume_partitioned_fasta(inpfile)
+    assert htable.count_partitions() == (2, 0)
 
-    s1 = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT"
-    assert ht.get(s1)
+    first_seq = "CATGCAGAAGTTCCGCAACCATACCGTTCAGT"
+    assert htable.get(first_seq)
 
-    s2 = "CAAATGTACATGCACTTAAAATCATCCAGCCG"
-    assert ht.get(s2)
+    second_seq = "CAAATGTACATGCACTTAAAATCATCCAGCCG"
+    assert htable.get(second_seq)
 
-    s3 = "CATGCAGAAGTTCCGCAACCATACCGTTCAGTTCCTGGTGGCTA"[-32:]
-    assert ht.get(s3)
+    third_s = "CATGCAGAAGTTCCGCAACCATACCGTTCAGTTCCTGGTGGCTA"[-32:]
+    assert htable.get(third_s)
 
 
 def test_count_within_radius_simple():
     inpfile = utils.get_test_data('all-A.fa')
-    ht = khmer._Hashbits(4, [3, 5])
+    htable = khmer._Hashbits(4, [3, 5])
 
-    print(ht.consume_fasta(inpfile))
-    n = ht.count_kmers_within_radius('AAAA', 1)
+    print(htable.consume_fasta(inpfile))
+    n = htable.count_kmers_within_radius('AAAA', 1)
     assert n == 1
 
-    n = ht.count_kmers_within_radius('AAAA', 10)
+    n = htable.count_kmers_within_radius('AAAA', 10)
     assert n == 1
 
 
 def test_count_within_radius_big():
     inpfile = utils.get_test_data('random-20-a.fa')
-    ht = khmer.Hashbits(20, 1e5, 4)
+    htable = khmer.Hashbits(20, 1e5, 4)
 
-    ht.consume_fasta(inpfile)
-    n = ht.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGG', int(1e6))
+    htable.consume_fasta(inpfile)
+    n = htable.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGG', int(1e6))
     assert n == 3961, n
 
-    ht = khmer.Hashbits(21, 1e5, 4)
-    ht.consume_fasta(inpfile)
-    n = ht.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGGC', int(1e6))
+    htable = khmer.Hashbits(21, 1e5, 4)
+    htable.consume_fasta(inpfile)
+    n = htable.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGGC', int(1e6))
     assert n == 39
 
 
 def test_count_kmer_degree():
     inpfile = utils.get_test_data('all-A.fa')
-    ht = khmer._Hashbits(4, [3, 5])
-    ht.consume_fasta(inpfile)
+    htable = khmer._Hashbits(4, [3, 5])
+    htable.consume_fasta(inpfile)
 
-    assert ht.kmer_degree('AAAA') == 2
-    assert ht.kmer_degree('AAAT') == 1
-    assert ht.kmer_degree('AATA') == 0
-    assert ht.kmer_degree('TAAA') == 1
+    assert htable.kmer_degree('AAAA') == 2
+    assert htable.kmer_degree('AAAT') == 1
+    assert htable.kmer_degree('AATA') == 0
+    assert htable.kmer_degree('TAAA') == 1
 
 
 def test_save_load_tagset():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     outfile = utils.get_temp_filename('tagset')
 
-    ht.add_tag('A' * 32)
-    ht.save_tagset(outfile)
+    htable.add_tag('A' * 32)
+    htable.save_tagset(outfile)
 
-    ht.add_tag('G' * 32)
+    htable.add_tag('G' * 32)
 
-    ht.load_tagset(outfile)              # implicitly => clear_tags=True
-    ht.save_tagset(outfile)
+    htable.load_tagset(outfile)              # implicitly => clear_tags=True
+    htable.save_tagset(outfile)
 
     # if tags have been cleared, then the new tagfile will be larger (34 bytes)
     # else smaller (26 bytes).
@@ -334,17 +336,17 @@ def test_save_load_tagset():
 
 
 def test_save_load_tagset_noclear():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     outfile = utils.get_temp_filename('tagset')
 
-    ht.add_tag('A' * 32)
-    ht.save_tagset(outfile)
+    htable.add_tag('A' * 32)
+    htable.save_tagset(outfile)
 
-    ht.add_tag('G' * 32)
+    htable.add_tag('G' * 32)
 
-    ht.load_tagset(outfile, False)       # set clear_tags => False; zero tags
-    ht.save_tagset(outfile)
+    htable.load_tagset(outfile, False)  # set clear_tags => False; zero tags
+    htable.save_tagset(outfile)
 
     # if tags have been cleared, then the new tagfile will be large (34 bytes);
     # else small (26 bytes).
@@ -358,88 +360,89 @@ def test_save_load_tagset_noclear():
 def test_stop_traverse():
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
     # without tagging/joining across consume, this breaks into two partition;
     # with, it is one partition.
-    ht.add_stop_tag('TTGCATACGTTGAGCCAGCG')
+    htable.add_stop_tag('TTGCATACGTTGAGCCAGCG')
 
-    ht.consume_fasta_and_tag(filename)   # DO NOT join reads across stoptags
-    subset = ht.do_subset_partition(0, 0, True)
-    ht.merge_subset(subset)
+    # DO NOT join reads across stoptags
+    htable.consume_fasta_and_tag(filename)
+    subset = htable.do_subset_partition(0, 0, True)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()
+    n, _ = htable.count_partitions()
     assert n == 2, n
 
 
 def test_tag_across_stoptraverse():
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
     # without tagging/joining across consume, this breaks into two partition;
     # with, it is one partition.
-    ht.add_stop_tag('CCGAATATATAACAGCGACG')
+    htable.add_stop_tag('CCGAATATATAACAGCGACG')
 
-    ht.consume_fasta_and_tag_with_stoptags(filename)  # DO join reads across
-
-    subset = ht.do_subset_partition(0, 0)
-    n, _ = ht.count_partitions()
+    # DO join reads across
+    htable.consume_fasta_and_tag_with_stoptags(filename)
+    subset = htable.do_subset_partition(0, 0)
+    n, _ = htable.count_partitions()
     assert n == 99                       # reads only connected by traversal...
 
-    n, _ = ht.subset_count_partitions(subset)
+    n, _ = htable.subset_count_partitions(subset)
     assert n == 2                        # but need main to cross stoptags.
 
-    ht.merge_subset(subset)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()         # ta-da!
+    n, _ = htable.count_partitions()         # ta-da!
     assert n == 1, n
 
 
 def test_notag_across_stoptraverse():
     filename = utils.get_test_data('random-20-a.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
 
     # connecting k-mer at the beginning/end of a read: breaks up into two.
-    ht.add_stop_tag('TTGCATACGTTGAGCCAGCG')
+    htable.add_stop_tag('TTGCATACGTTGAGCCAGCG')
 
-    ht.consume_fasta_and_tag_with_stoptags(filename)
+    htable.consume_fasta_and_tag_with_stoptags(filename)
 
-    subset = ht.do_subset_partition(0, 0)
-    ht.merge_subset(subset)
+    subset = htable.do_subset_partition(0, 0)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()
+    n, _ = htable.count_partitions()
     assert n == 2, n
 
 
 def test_find_stoptags():
-    ht = khmer._Hashbits(5, [1])
-    ht.add_stop_tag("AAAAA")
+    htable = khmer._Hashbits(5, [1])
+    htable.add_stop_tag("AAAAA")
 
-    assert ht.identify_stoptags_by_position("AAAAA") == [0]
-    assert ht.identify_stoptags_by_position("AAAAAA") == [0, 1]
-    assert ht.identify_stoptags_by_position("TTTTT") == [0]
-    assert ht.identify_stoptags_by_position("TTTTTT") == [0, 1]
+    assert htable.identify_stoptags_by_position("AAAAA") == [0]
+    assert htable.identify_stoptags_by_position("AAAAAA") == [0, 1]
+    assert htable.identify_stoptags_by_position("TTTTT") == [0]
+    assert htable.identify_stoptags_by_position("TTTTTT") == [0, 1]
 
 
-def test_find_stoptags2():
-    ht = khmer._Hashbits(4, [1])
-    ht.add_stop_tag("ATGC")
+def test_find_stoptagsecond_seq():
+    htable = khmer._Hashbits(4, [1])
+    htable.add_stop_tag("ATGC")
 
-    x = ht.identify_stoptags_by_position("ATGCATGCGCAT")
+    x = htable.identify_stoptags_by_position("ATGCATGCGCAT")
     assert x == [0, 2, 4, 8], x
 
 
@@ -450,7 +453,10 @@ def test_get_ksize():
 
 def test_get_hashsizes():
     kh = khmer.Hashbits(22, 100, 4)
-    assert kh.hashsizes() == [97L, 89L, 83L, 79L], kh.hashsizes()
+    # Py2/3 hack, longify converts to long in py2, remove once py2 isn't
+    # supported any longer.
+    expected = utils.longify([97, 89, 83, 79])
+    assert kh.hashsizes() == expected, kh.hashsizes()
 
 
 def test_extract_unique_paths_0():
@@ -510,21 +516,21 @@ def test_find_unpart():
     filename = utils.get_test_data('random-20-a.odd.fa')
     filename2 = utils.get_test_data('random-20-a.even.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
-    ht.consume_fasta_and_tag(filename)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
+    htable.consume_fasta_and_tag(filename)
 
-    subset = ht.do_subset_partition(0, 0)
-    ht.merge_subset(subset)
+    subset = htable.do_subset_partition(0, 0)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()
+    n, _ = htable.count_partitions()
     assert n == 49
 
-    ht.find_unpart(filename2, True, False)
-    n, _ = ht.count_partitions()
+    htable.find_unpart(filename2, True, False)
+    n, _ = htable.count_partitions()
     assert n == 1, n                     # all sequences connect
 
 
@@ -532,21 +538,21 @@ def test_find_unpart_notraverse():
     filename = utils.get_test_data('random-20-a.odd.fa')
     filename2 = utils.get_test_data('random-20-a.even.fa')
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
-    ht.consume_fasta_and_tag(filename)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
+    htable.consume_fasta_and_tag(filename)
 
-    subset = ht.do_subset_partition(0, 0)
-    ht.merge_subset(subset)
+    subset = htable.do_subset_partition(0, 0)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()
+    n, _ = htable.count_partitions()
     assert n == 49
 
-    ht.find_unpart(filename2, False, False)     # <-- don't traverse
-    n, _ = ht.count_partitions()
+    htable.find_unpart(filename2, False, False)     # <-- don't traverse
+    n, _ = htable.count_partitions()
     assert n == 99, n                    # all sequences disconnected
 
 
@@ -554,21 +560,21 @@ def test_find_unpart_fail():
     filename = utils.get_test_data('random-20-a.odd.fa')
     filename2 = utils.get_test_data('random-20-a.odd.fa')  # <- switch to odd
 
-    K = 20  # size of kmer
-    HT_SIZE = 1e4  # size of hashtable
-    N_HT = 3  # number of hashtables
+    ksize = 20  # size of kmer
+    htable_size = 1e4  # size of hashtableable
+    num_htableables = 3  # number of hashtableables
 
-    ht = khmer.Hashbits(K, HT_SIZE, N_HT)
-    ht.consume_fasta_and_tag(filename)
+    htable = khmer.Hashbits(ksize, htable_size, num_htableables)
+    htable.consume_fasta_and_tag(filename)
 
-    subset = ht.do_subset_partition(0, 0)
-    ht.merge_subset(subset)
+    subset = htable.do_subset_partition(0, 0)
+    htable.merge_subset(subset)
 
-    n, _ = ht.count_partitions()
+    n, _ = htable.count_partitions()
     assert n == 49
 
-    ht.find_unpart(filename2, True, False)
-    n, _ = ht.count_partitions()
+    htable.find_unpart(filename2, True, False)
+    n, _ = htable.count_partitions()
     assert n == 49, n                    # only 49 sequences worth of tags
 
 
@@ -617,13 +623,13 @@ def test_badget():
 
 
 def test_load_notexist_should_fail():
-    savepath = utils.get_temp_filename('temphashbitssave0.ht')
+    savepath = utils.get_temp_filename('temphashbitssave0.htable')
 
     hi = khmer._CountingHash(12, [1])
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError:
+    except OSError:
         pass
 
 
@@ -648,29 +654,29 @@ def test_load_truncated_should_fail():
     try:
         hi.load(savepath)
         assert 0, "load should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_save_load_tagset_notexist():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     outfile = utils.get_temp_filename('tagset')
     try:
-        ht.load_tagset(outfile)
+        htable.load_tagset(outfile)
         assert 0, "this test should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_save_load_tagset_trunc():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     outfile = utils.get_temp_filename('tagset')
 
-    ht.add_tag('A' * 32)
-    ht.add_tag('G' * 32)
-    ht.save_tagset(outfile)
+    htable.add_tag('A' * 32)
+    htable.add_tag('G' * 32)
+    htable.save_tagset(outfile)
 
     # truncate tagset file...
     fp = open(outfile, 'rb')
@@ -684,11 +690,17 @@ def test_save_load_tagset_trunc():
 
         # try loading it...
         try:
-            ht.load_tagset(outfile)
+            htable.load_tagset(outfile)
             assert 0, "this test should fail"
-        except IOError as err:
+        except OSError as err:
             print(str(err), i)
 
+    # try loading it...
+    try:
+        htable.load_tagset(outfile)
+        assert 0, "this test should fail"
+    except OSError:
+        pass
 
 # to build the test files used below, add 'test' to this function
 # and then look in /tmp. You will need to tweak the version info in
@@ -701,48 +713,48 @@ def _build_testfiles():
     inpath = utils.get_test_data('random-20-a.fa')
     hi = khmer.Hashbits(12, 2)
     hi.consume_fasta(inpath)
-    hi.save('/tmp/goodversion-k12.ht')
+    hi.save('/tmp/goodversion-k12.htable')
 
     # tagset file
 
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
-    ht.add_tag('A' * 32)
-    ht.add_tag('G' * 32)
-    ht.save_tagset('/tmp/goodversion-k32.tagset')
+    htable.add_tag('A' * 32)
+    htable.add_tag('G' * 32)
+    htable.save_tagset('/tmp/goodversion-k32.tagset')
 
     # stoptags file
 
     fakelump_fa = utils.get_test_data('fakelump.fa')
 
-    ht = khmer.Hashbits(32, 4, 4)
-    ht.consume_fasta_and_tag(fakelump_fa)
+    htable = khmer.Hashbits(32, 4, 4)
+    htable.consume_fasta_and_tag(fakelump_fa)
 
-    subset = ht.do_subset_partition(0, 0)
-    ht.merge_subset(subset)
+    subset = htable.do_subset_partition(0, 0)
+    htable.merge_subset(subset)
 
     EXCURSION_DISTANCE = 40
-    EXCURSION_KMER_THRESHOLD = 82
-    EXCURSION_KMER_COUNT_THRESHOLD = 1
+    EXCURSION_ksizeMER_THRESHOLD = 82
+    EXCURSION_ksizeMER_COUNT_THRESHOLD = 1
     counting = khmer.CountingHash(32, 4, 4)
 
-    ht.repartition_largest_partition(None, counting,
-                                     EXCURSION_DISTANCE,
-                                     EXCURSION_KMER_THRESHOLD,
-                                     EXCURSION_KMER_COUNT_THRESHOLD)
+    htable.repartition_largest_partition(None, counting,
+                                         EXCURSION_DISTANCE,
+                                         EXCURSION_ksizeMER_THRESHOLD,
+                                         EXCURSION_ksizeMER_COUNT_THRESHOLD)
 
-    ht.save_stop_tags('/tmp/goodversion-k32.stoptags')
+    htable.save_stop_tags('/tmp/goodversion-k32.stoptags')
 
 
 def test_hashbits_file_version_check():
-    ht = khmer._Hashbits(12, [1])
+    htable = khmer._Hashbits(12, [1])
 
-    inpath = utils.get_test_data('badversion-k12.ht')
+    inpath = utils.get_test_data('badversion-k12.htable')
 
     try:
-        ht.load(inpath)
+        htable.load(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -751,63 +763,63 @@ def test_hashbits_file_type_check():
     savepath = utils.get_temp_filename('tempcountingsave0.ct')
     kh.save(savepath)
 
-    ht = khmer._Hashbits(12, [1])
+    htable = khmer._Hashbits(12, [1])
 
     try:
-        ht.load(savepath)
+        htable.load(savepath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_stoptags_file_version_check():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     inpath = utils.get_test_data('badversion-k32.stoptags')
 
     try:
-        ht.load_stop_tags(inpath)
+        htable.load_stop_tags(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_stoptags_ksize_check():
-    ht = khmer._Hashbits(31, [1])
+    htable = khmer._Hashbits(31, [1])
 
     inpath = utils.get_test_data('goodversion-k32.stoptags')
     try:
-        ht.load_stop_tags(inpath)
+        htable.load_stop_tags(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_stop_tags_filetype_check():
-    ht = khmer._Hashbits(31, [1])
+    htable = khmer._Hashbits(31, [1])
 
     inpath = utils.get_test_data('goodversion-k32.tagset')
     try:
-        ht.load_stop_tags(inpath)
+        htable.load_stop_tags(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_tagset_file_version_check():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     inpath = utils.get_test_data('badversion-k32.tagset')
 
     try:
-        ht.load_tagset(inpath)
+        htable.load_tagset(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_stop_tags_truncate_check():
-    ht = khmer._Hashbits(32, [1])
+    htable = khmer._Hashbits(32, [1])
 
     inpath = utils.get_test_data('goodversion-k32.tagset')
     data = open(inpath, 'rb').read()
@@ -819,31 +831,31 @@ def test_stop_tags_truncate_check():
         fp.close()
 
         try:
-            ht.load_stop_tags(truncpath)
+            htable.load_stop_tags(truncpath)
             assert 0, "expect failure of previous command"
-        except IOError as e:
+        except OSError as e:
             print(i, str(e))
 
 
 def test_tagset_ksize_check():
-    ht = khmer._Hashbits(31, [1])
+    htable = khmer._Hashbits(31, [1])
 
     inpath = utils.get_test_data('goodversion-k32.tagset')
     try:
-        ht.load_tagset(inpath)
+        htable.load_tagset(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
 def test_tagset_filetype_check():
-    ht = khmer._Hashbits(31, [1])
+    htable = khmer._Hashbits(31, [1])
 
     inpath = utils.get_test_data('goodversion-k32.stoptags')
     try:
-        ht.load_tagset(inpath)
+        htable.load_tagset(inpath)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
 
 
@@ -866,7 +878,7 @@ def test_consume_absentfasta_with_reads_parser():
         readparser = ReadParser(utils.get_test_data('empty-file'))
         presencetable.consume_fasta_with_reads_parser(readparser)
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
     except ValueError as err:
         print(str(err))
@@ -887,7 +899,7 @@ def test_consume_fasta_and_tag_with_badreads_parser():
         readsparser = khmer.ReadParser(utils.get_test_data("test-empty.fa"))
         presencetable.consume_fasta_and_tag_with_reads_parser(readsparser)
         assert 0, "this should fail"
-    except IOError as e:
+    except OSError as e:
         print(str(e))
     except ValueError as e:
         print(str(e))
diff --git a/tests/test_hll.py b/tests/test_hll.py
index ebde12d..297374a 100644
--- a/tests/test_hll.py
+++ b/tests/test_hll.py
@@ -74,7 +74,7 @@ def test_hll_consume_string():
 def test_hll_empty_fasta():
     filename = utils.get_test_data('test-empty.fa')
     hll = khmer.HLLCounter(ERR_RATE, K)
-    with assert_raises(IOError):
+    with assert_raises(OSError):
         hll.consume_fasta(filename)
 
 
diff --git a/tests/test_labelhash.py b/tests/test_labelhash.py
index 59a56b0..c3567cd 100644
--- a/tests/test_labelhash.py
+++ b/tests/test_labelhash.py
@@ -106,7 +106,7 @@ def test_get_label_dict_save_load_wrong_ksize():
     try:
         lb.load_labels_and_tags(savepath)
         assert 0, "this should not succeed - different ksize"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
         assert "Incorrect k-mer size 19" in str(err)
 
@@ -136,7 +136,7 @@ def test_save_load_corrupted():
         try:
             lb.load_labels_and_tags(truncated)
             assert 0, "this should not succeed -- truncated file len %d" % (i,)
-        except IOError as err:
+        except OSError as err:
             print('expected failure for', i, ': ', str(err))
 
 
@@ -155,7 +155,7 @@ def test_save_fail_readonly():
     try:
         lb_pre.save_labels_and_tags(savepath)
         assert 0, "this should fail: read-only file"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
 
 
@@ -410,7 +410,7 @@ def test_load_wrong_filetype():
     try:
         lb.load_labels_and_tags(filename)
         assert 0, "this should not succeed - bad file type"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
         assert "Incorrect file format type" in str(err)
 
@@ -419,7 +419,7 @@ def test_load_wrong_filetype():
     try:
         lb.load_labels_and_tags(filename)
         assert 0, "this should not succeed - bad file signature"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
         assert "Incorrect file signature" in str(err)
 
@@ -432,6 +432,6 @@ def test_load_wrong_fileversion():
     try:
         lb.load_labels_and_tags(filename)
         assert 0, "this should not succeed - bad file type"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
         assert "Incorrect file format version" in str(err)
diff --git a/tests/test_lump.py b/tests/test_lump.py
index c7eeb0d..511ee00 100644
--- a/tests/test_lump.py
+++ b/tests/test_lump.py
@@ -152,7 +152,7 @@ def test_fakelump_load_stop_tags_trunc():
     try:
         ht.load_stop_tags(fakelump_fa_foo)
         assert 0, "this test should fail"
-    except IOError:
+    except OSError:
         pass
 
 
@@ -165,5 +165,5 @@ def test_fakelump_load_stop_tags_notexist():
     try:
         ht.load_stop_tags(fakelump_fa_foo)
         assert 0, "this test should fail"
-    except IOError:
+    except OSError:
         pass
diff --git a/tests/test_normalize_by_median.py b/tests/test_normalize_by_median.py
index abdcc58..41d22b4 100644
--- a/tests/test_normalize_by_median.py
+++ b/tests/test_normalize_by_median.py
@@ -6,7 +6,7 @@ from __future__ import unicode_literals
 # Copyright (C) Michigan State University, 2009-2015. It is licensed under
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
-#
+# pylint: disable=missing-docstring,invalid-name,unused-variable
 
 import os
 import shutil
@@ -32,6 +32,20 @@ def test_normalize_by_median_indent():
     assert os.path.exists(outfile)
 
 
+def test_normalize_by_median_empty_file():
+    infile = utils.get_temp_filename('empty')
+    shutil.copyfile(utils.get_test_data('empty-file'), infile)
+    script = 'normalize-by-median.py'
+    in_dir = os.path.dirname(infile)
+
+    args = [infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert 'WARNING:' in err, err
+    assert 'is empty' in err, err
+    assert 'SKIPPED' in err, err
+
+
 def test_normalize_by_median():
     CUTOFF = '1'
 
@@ -52,7 +66,7 @@ def test_normalize_by_median():
     seqs = [r.sequence for r in screed.open(outfile)]
     assert len(seqs) == 1, seqs
     assert seqs[0].startswith('GGTTGACGGGGCTCAGGGGG'), seqs
-    assert "IOErrors" not in err
+    assert "I/O Errors" not in err
 
 
 def test_normalize_by_median_unpaired_final_read():
@@ -64,13 +78,50 @@ def test_normalize_by_median_unpaired_final_read():
     shutil.copyfile(utils.get_test_data('single-read.fq'), infile)
 
     script = 'normalize-by-median.py'
-    args = ['-C', CUTOFF, '-k', '17', '-p',  infile]
-    try:
-        (status, out, err) = utils.runscript(script, args, in_dir)
-        raise Exception("Shouldn't get to this")
-    except AssertionError as e:
-        out = str(e)
-        assert "ERROR: Unpaired reads when require_paired" in out, out
+    args = ['-C', CUTOFF, '-k', '17', '-p', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "ERROR: Unpaired reads when require_paired" in err, err
+
+
+def test_normalize_by_median_sanity_check_0():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('single-read.fq'), infile)
+
+    script = 'normalize-by-median.py'
+    args = ['-U', '1024', '--max-mem', '60', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "recommended false positive ceiling of 0.1!" in err, err
+
+
+def test_normalize_by_median_sanity_check_1():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-filter-abund-Ns.fq'), infile)
+
+    script = 'normalize-by-median.py'
+    args = ['-U', '83', '--max-tablesize', '17', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "Warning: The given tablesize is too small!" in err, err
+
+
+def test_normalize_by_median_sanity_check_2():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-filter-abund-Ns.fq'), infile)
+
+    script = 'normalize-by-median.py'
+    args = ['-U', '83', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert "*** INFO: set memory ceiling using auto optimization." in err, err
+    assert "*** Ceiling is: 399 bytes" in err, err
 
 
 def test_normalize_by_median_unforced_badfile():
@@ -81,12 +132,9 @@ def test_normalize_by_median_unforced_badfile():
     in_dir = os.path.dirname(infile)
     script = 'normalize-by-median.py'
     args = ['-C', CUTOFF, '-k', '17', infile]
-    try:
-        (status, out, err) = utils.runscript(script, args, in_dir)
-        raise Exception("Shouldn't get to this")
-    except AssertionError as e:
-        out = str(e)
-        assert "ERROR: [Errno 2] No such file or directory:" in out, out
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "ERROR: [Errno 2] No such file or directory:" in err, err
 
     if os.path.exists(outfile):
         assert False, '.keep file should have been removed: '
@@ -102,12 +150,9 @@ def test_normalize_by_median_contradictory_args():
     script = 'normalize-by-median.py'
     args = ['-C', '1', '-k', '17', '--force-single', '-p', '-R',
             outfile, infile]
-    try:
-        (status, out, err) = utils.runscript(script, args, in_dir)
-        raise Exception("Shouldn't get to this")
-    except AssertionError as e:
-        out = str(e)
-        assert "cannot both be set" in out, out
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "cannot both be set" in err, err
 
 
 def test_normalize_by_median_stdout_3():
@@ -124,7 +169,7 @@ def test_normalize_by_median_stdout_3():
 
     assert 'Total number of unique k-mers: 98' in err, err
     assert 'in /dev/stdout' in err, err
-    assert "IOErrors" not in err
+    assert "I/O Errors" not in err
 
 
 @attr('known_failing')
@@ -151,8 +196,62 @@ def test_normalize_by_median_known_good():
         assert False
 
 
- at attr('huge')
 def test_normalize_by_median_report_fp():
+    # this tests basic reporting of diginorm stats => report.out, including
+    # a test of aggregate stats for two input files.
+
+    infile = utils.get_temp_filename('test.fa')
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+    infile2 = utils.get_temp_filename('test2.fa')
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile2)
+
+    in_dir = os.path.dirname(infile)
+    outfile = utils.get_temp_filename('report.out')
+
+    script = 'normalize-by-median.py'
+    args = ['-C', '1', '-k', '17', '-R', outfile, infile, infile2]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert os.path.exists(outfile)
+    report = open(outfile, 'r')
+    line = report.readline().strip()
+    assert line == 'total,kept,f_kept', line
+    line = report.readline().strip()
+    assert line == '1001,1,0.000999', line
+    line = report.readline().strip()
+    assert line == '2002,1,0.0004995', line
+
+
+def test_normalize_by_median_report_fp_hifreq():
+    # this tests high-frequency reporting of diginorm stats for a single
+    # file => report.out.
+
+    infile = utils.get_temp_filename('test.fa')
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    in_dir = os.path.dirname(infile)
+    outfile = utils.get_temp_filename('report.out')
+
+    script = 'normalize-by-median.py'
+    args = ['-C', '1', '-k', '17', '-R', outfile, infile,
+            '--report-frequency', '100']
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert os.path.exists(outfile)
+    report = open(outfile, 'r')
+    line = report.readline().strip()
+    assert line == 'total,kept,f_kept', line
+    line = report.readline().strip()
+    assert line == '100,1,0.01', line
+    line = report.readline().strip()
+    assert line == '200,1,0.005', line
+
+
+ at attr('huge')
+def test_normalize_by_median_report_fp_huge():
+    # this tests reporting of diginorm stats => report.out for a large
+    # file, with the default reporting interval of once every 100k.
+
     infile = utils.get_temp_filename('test.fa')
     in_dir = os.path.dirname(infile)
     outfile = utils.get_temp_filename('report.out')
@@ -165,8 +264,9 @@ def test_normalize_by_median_report_fp():
 
     assert "fp rate estimated to be 0.623" in err, err
     report = open(outfile, 'r')
+    line = report.readline()            # skip header
     line = report.readline()
-    assert "100000 25261 0.25261" in line, line
+    assert "100000,25261,0.2526" in line, line
 
 
 def test_normalize_by_median_unpaired_and_paired():
@@ -203,12 +303,12 @@ def test_normalize_by_median_count_kmers_PE():
     args = ['-C', CUTOFF, '-k', '17', '--force-single', infile]
     (status, out, err) = utils.runscript(script, args, in_dir)
     assert 'Total number of unique k-mers: 98' in err, err
-    assert 'kept 1 of 2 or 50%' in err, err
+    assert 'kept 1 of 2 or 50.0%' in err, err
 
     args = ['-C', CUTOFF, '-k', '17', '-p', infile]
     (status, out, err) = utils.runscript(script, args, in_dir)
     assert 'Total number of unique k-mers: 99' in err, err
-    assert 'kept 2 of 2 or 100%' in err, err
+    assert 'kept 2 of 2 or 100.0%' in err, err
 
 
 def test_normalize_by_median_double_file_name():
@@ -220,10 +320,21 @@ def test_normalize_by_median_double_file_name():
     script = 'normalize-by-median.py'
     args = [utils.get_test_data('test-abund-read-2.fa'), infile]
 
-    try:
-        (status, out, err) = utils.runscript(script, args, in_dir)
-    except AssertionError as e:
-        assert "Duplicate filename--Cannot handle this!" in str(e), str(e)
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "Duplicate filename--Cannot handle this!" in err, err
+
+
+def test_normalize_by_median_stdin_no_out():
+    infile = utils.get_temp_filename('test-abund-read-2.fa')
+    in_dir = os.path.dirname(infile)
+
+    script = 'normalize-by-median.py'
+    args = ["-"]
+
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename" in err, err
 
 
 def test_normalize_by_median_overwrite():
@@ -326,7 +437,7 @@ def test_normalize_by_median_paired_fq():
     assert seqs[0].startswith('GGTTGACGGGGCTCAGGGGG'), seqs
     assert seqs[1].startswith('GGTTGACGGGGCTCAGGG'), seqs
 
-    names = [r.name for r in screed.open(outfile, parse_description=False)]
+    names = [r.name for r in screed.open(outfile)]
     assert len(names) == 6, names
     assert '895:1:37:17593:9954 1::FOO' in names, names
     assert '895:1:37:17593:9954 2::FOO' in names, names
@@ -342,7 +453,8 @@ def test_normalize_by_median_impaired():
 
     script = 'normalize-by-median.py'
     args = ['-C', CUTOFF, '-p', '-k', '17', infile]
-    _, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    status != 0
     assert 'ERROR: Unpaired reads ' in err, err
 
 
@@ -365,7 +477,7 @@ def test_normalize_by_median_force():
     (status, out, err) = utils.runscript(script, args, in_dir)
 
     assert '*** Skipping' in err
-    assert '** IOErrors' in err
+    assert '** I/O Errors' in err
 
 
 def test_normalize_by_median_no_bigcount():
@@ -417,6 +529,7 @@ def test_normalize_by_median_emptycountingtable():
     script = 'normalize-by-median.py'
     args = ['-C', CUTOFF, '--loadtable', infile, infile]
     (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status != 0
     assert 'ValueError' in err, (status, out, err)
 
 
@@ -431,10 +544,7 @@ def test_normalize_by_median_fpr():
     args = ['-f', '-k 17', '-x ' + str(MAX_TABLESIZE_PARAM), infile]
 
     (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
-
-    print(out)
-    print(err)
-
+    assert status != 0
     assert os.path.exists(infile + '.keep'), infile
     assert '** ERROR: the graph structure is too small' in err, err
 
@@ -450,7 +560,7 @@ def write_by_chunks(infile, outfile, CHUNKSIZE=8192):
     ofile.close()
 
 
-def test_normalize_by_median_streaming():
+def test_normalize_by_median_streaming_0():
     CUTOFF = '20'
 
     infile = utils.get_test_data('100-reads.fq.gz')
@@ -477,6 +587,32 @@ def test_normalize_by_median_streaming():
     assert linecount == 400
 
 
+def test_normalize_by_median_streaming_1():
+    CUTOFF = '20'
+
+    infile = utils.get_test_data('test-filter-abund-Ns.fq')
+    in_dir = os.path.dirname(infile)
+    fifo = utils.get_temp_filename('fifo')
+    outfile = utils.get_temp_filename('outfile')
+
+    # Use a fifo to copy stdout to a file for checking
+    os.mkfifo(fifo)
+    thread = threading.Thread(target=write_by_chunks, args=(infile, fifo))
+    thread.start()
+
+    # Execute diginorm
+    script = 'normalize-by-median.py'
+    args = ['-C', CUTOFF, '-k', '17', '-o', outfile, fifo]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    # Merge the thread
+    thread.join()
+
+    assert os.path.exists(outfile), outfile
+    assert 'Total number of unique k-mers: 98' in err, err
+    assert 'fifo is empty' not in err, err
+
+
 def test_diginorm_basic_functionality_1():
     # each of these pairs has both a multicopy sequence ('ACTTCA...') and
     # a random sequence.  With 'C=1' and '-p', all should be kept.
diff --git a/tests/test_oxli_functions.py b/tests/test_oxli_functions.py
index 63ad48c..22e4371 100644
--- a/tests/test_oxli_functions.py
+++ b/tests/test_oxli_functions.py
@@ -16,26 +16,26 @@ from oxli import functions
 
 
 def test_estimate_functions_1():
-    res = functions.estimate_optimal_with_N_and_M(99, 1024)
+    res = functions.estimate_optimal_with_K_and_M(99, 1024)
     assert res[0] == 7, res[0]
     assert res[1] == 146, res[1]
     assert res[2] == 1022, res[2]
     assert abs(.008 - res[3]) < .001, res[3]
 
-    res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897)
+    res = functions.estimate_optimal_with_K_and_f(99, 0.00701925498897)
     assert res[0] == 7, res[0]
     assert res[1] == 145, res[1]
     assert res[2] == 1015, res[2]
     assert abs(.008 - res[3]) < .002, res[3]
 
-    res = functions.estimate_optimal_with_N_and_M(1024, 2)
+    res = functions.estimate_optimal_with_K_and_M(1024, 2)
     assert res[0] == 1, res[0]
     assert res[1] == 2, res[1]
     assert res[2] == 2, res[2]
     assert res[3] == 1.0, res[3]
 
     # using a crazy high FP rate just for coverage
-    res = functions.estimate_optimal_with_N_and_f(1024, 0.7)
+    res = functions.estimate_optimal_with_K_and_f(1024, 0.7)
     assert res[0] == 1, res[0]
     assert res[1] == 850, res[1]
     assert res[2] == 850, res[2]
@@ -43,18 +43,46 @@ def test_estimate_functions_1():
 
 
 def test_estimate_functions_namedtup():
-    res = functions.estimate_optimal_with_N_and_M(99, 1024)
+    res = functions.estimate_optimal_with_K_and_M(99, 1024)
     assert res.num_htables == 7, res[0]
     assert res.htable_size == 146, res[1]
     assert res.mem_use == 1022, res[2]
     assert abs(.008 - res.fp_rate) < .001, res[3]
 
-    res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897)
+    res = functions.estimate_optimal_with_K_and_f(99, 0.00701925498897)
     assert res.num_htables == 7, res[0]
     assert res.htable_size == 145, res[1]
     assert res.mem_use == 1015, res[2]
     assert abs(.008 - res.fp_rate) < .002, res[3]
 
 
+def test_optimal_size_function():
+    res = functions.optimal_size(99, mem_cap=1024)
+    assert res.num_htables == 7, res[0]
+    assert res.htable_size == 146, res[1]
+    assert res.mem_use == 1022, res[2]
+    assert abs(.008 - res.fp_rate) < .001, res[3]
+
+    res = functions.optimal_size(99, fp_rate=0.00701925498897)
+    assert res.num_htables == 7, res[0]
+    assert res.htable_size == 145, res[1]
+    assert res.mem_use == 1015, res[2]
+    assert abs(.008 - res.fp_rate) < .002, res[3]
+
+    try:
+        functions.optimal_size(99, mem_cap=1024, fp_rate=0.00701925498897)
+        assert 0, "this should fail"
+    except TypeError as err:
+        print(str(err))
+        assert "num_kmers and either mem_cap or fp_rate" in str(err)
+
+    try:
+        functions.optimal_size(99)
+        assert 0, "this should fail"
+    except TypeError as err:
+        print(str(err))
+        assert "num_kmers and either mem_cap or fp_rate" in str(err)
+
+
 def test_output_gen():
     res = functions.optimal_args_output_gen(99, 0.00701925498897)
diff --git a/tests/test_read_aligner.py b/tests/test_read_aligner.py
index 0fa9eec..adbf24e 100644
--- a/tests/test_read_aligner.py
+++ b/tests/test_read_aligner.py
@@ -1,17 +1,70 @@
+from __future__ import print_function
+from __future__ import absolute_import
 #
 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2015. It is licensed under
 # the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
-from __future__ import print_function
 
 import khmer
-from nose.tools import assert_almost_equals
+from . import khmer_tst_utils as utils
+# from nose.tools import assert_almost_equals
+
+
+def pretty_compare(a, b):
+    print(len(a), len(b))
+
+    line1 = []
+    line2 = []
+    line3 = []
+    for (x, y) in zip(a, b):
+        line1.append(x)
+        line2.append(y)
+        if x == y:
+            line3.append('|')
+        else:
+            line3.append('x')
+
+    for i in range(0, len(line1), 60):
+        print("".join(line1[i:i + 60]))
+        print("".join(line3[i:i + 60]))
+        print("".join(line2[i:i + 60]))
 
 
-# DISABLING TESTS until model probabilities are finalized
 def eq_(v1, v2):
-    return True
+    assert len(v1)
+    if v1 != v2:
+        pretty_compare(v1, v2)
+    assert v1 == v2, (v1, v2)
+
+
+def neq_(v1, v2):
+    assert len(v1)
+    if v1 == v2:
+        pretty_compare(v1, v2)
+    assert v1 != v2, (v1, v2)
+
+
+def test_graph_attribute():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    assert aligner.graph is ch
+
+
+def test_align_nothing():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "ACCAAGGCTCGAGATTTACC"
+
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+    score, graphAlign, readAlign, trunc = aligner.align(read)
+
+    print(score, graphAlign, readAlign)
+
+    assert trunc
+    assert len(graphAlign) == 0
+    assert len(readAlign) == 0
 
 
 def test_alignnocov():
@@ -26,9 +79,284 @@ def test_alignnocov():
     # should be the same
     eq_(readAlign, 'ACCTAGGTTCGACATGTACC')
     eq_(graphAlign, 'ACCTAGGTTCGACATGTACC')
+    assert not trunc
+
+
+def test_align_middle():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "TCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+    ch.consume(read)
+    score, graphAlign, readAlign, trunc = aligner.align(read)
+
+    # should be the same
+    eq_(readAlign, read)
+    eq_(graphAlign, read)
+    assert not trunc
+
+
+def test_align_middle_trunc():
+    return  # @CTB
+
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "TCGACAAGTCCTTGACAGATGGGGGG"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+
+    # omit suffix from graph
+    ch.consume(read[:-5])
+    score, graphAlign, readAlign, trunc = aligner.align(read)
+
+    # should not be the same...
+    neq_(readAlign, read)
+    neq_(graphAlign, read)
+
+    eq_(readAlign, read[:-5])
+    eq_(graphAlign, read[:-5])
+
+    # ...but truncated
+    assert trunc
+
+
+def test_align_middle_trunc_2():
+    return  # @CTB
+
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "GGGGGGGGGGGGTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AAAAAAAAAAAATCGACAAGTCCTTGACAGAT")
+
+    # omit prefix from graph
+    ch.consume(read[12:])
+    score, graphAlign, readAlign, trunc = aligner.align(read)
+
+    # here, the alignment must start not at the beginning
+    print(readAlign)
+    print(graphAlign)
+
+    eq_(readAlign, read[12:])
+    eq_(graphAlign, read[12:])
+
+    # ...but truncated
+    assert trunc
+
+
+def test_align_fwd_nothing():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "ACCAAGGCTCGAGATTTACC"
+
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+    score, graphAlign, readAlign, trunc, _ = aligner.align_forward(read)
+
+    print(score, graphAlign, readAlign)
+
+    assert trunc
+    assert len(graphAlign) == 0
+    assert len(readAlign) == 0
+
+
+def test_align_fwd_nocov():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "ACCTAGGTTCGACATGTACC"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+    ch.consume("ACCTAGGTTCGACATGTACC")
+    score, graphAlign, readAlign, trunc, _ = aligner.align_forward(read)
+
+    # should be the same
+    eq_(readAlign, 'ACCTAGGTTCGACATGTACC')
+    eq_(graphAlign, 'ACCTAGGTTCGACATGTACC')
+    assert not trunc
+
+
+def test_align_fwd_middle():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "TCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+    ch.consume(read)
+    score, graphAlign, readAlign, trunc, _ = aligner.align_forward(read)
+
+    # should be the same
+    eq_(readAlign, read)
+    eq_(graphAlign, read)
+    assert not trunc
+
+
+def test_align_fwd_middle_trunc():
+    return  # @CTB
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "TCGACAAGTCCTTGACAGATGGGGGG"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
+
+    # omit suffix from graph
+    ch.consume(read[:-5])
+    score, graphAlign, readAlign, trunc, _ = aligner.align_forward(read)
+
+    # should not be the same...
+    neq_(readAlign, read)
+    neq_(graphAlign, read)
+
+    eq_(readAlign, read[:-5])
+    eq_(graphAlign, read[:-5])
+
+    # ...but truncated
+    assert trunc
+
+
+def test_align_fwd_middle_trunc_2():
+    ch = khmer.CountingHash(10, 1048576, 1)
+    read = "GGGGGGGGGGGGTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(20):
+        ch.consume("AAAAAAAAAAAATCGACAAGTCCTTGACAGAT")
+
+    # omit prefix from graph
+    ch.consume(read[12:])
+    score, graphAlign, readAlign, trunc, _ = aligner.align_forward(read)
+
+    # this will fail, because align_forward chooses the first kmer as the
+    # seed.
+    assert not readAlign
+    assert not graphAlign
+    assert trunc
+
+
+def test_align_fwd_covs_1():
+    K = 10
+
+    ch = khmer.CountingHash(K, 1048576, 1)
+    read = "GTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(19):
+        ch.consume(read)
+
+    ch.consume("CTCGACAAGTCCTTGACAGAT")
+    #           ^
+    score, g, r, is_t, covs = aligner.align_forward(read)
+
+    for start in range(0, len(read) - K + 1):
+        print(ch.get(read[start:start + K]), end=' ')
+    print('')
+
+    assert len(covs) == len(read)
+    assert covs[0] == 19
+    assert min(covs[1:-K]) == 20, covs
+    assert max(covs) == 20, covs
+
+
+def test_align_fwd_covs_2():
+    K = 10
+
+    ch = khmer.CountingHash(K, 1048576, 1)
+    read = "GTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(19):
+        ch.consume(read)
+
+    ch.consume("GACGACAAGTCCTTGACAGAT")
+    #            ^
+    score, g, r, is_t, covs = aligner.align_forward(read)
+
+    print(covs, g)
+    for start in range(0, len(read) - K + 1):
+        print(ch.get(read[start:start + K]), end=' ')
+    print('')
+
+    assert len(covs) == len(read)
+    assert covs[0] == 19
+    assert covs[1] == 19
+    assert min(covs[2:-K]) == 20, covs
+    assert max(covs) == 20, covs
+
+
+def test_align_fwd_covs_3():
+    K = 10
+
+    ch = khmer.CountingHash(K, 1048576, 1)
+    read = "GTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(19):
+        ch.consume(read)
+
+    ch.consume("GTAGACAAGTCCTTGACAGAT")
+    #             ^
+    score, g, r, is_t, covs = aligner.align_forward(read)
+
+    print(covs, g)
+    for start in range(0, len(read) - K + 1):
+        print(ch.get(read[start:start + K]), end=' ')
+    print('')
+
+    assert len(covs) == len(read)
+    assert covs[0] == 19
+    assert covs[1] == 19
+    assert covs[2] == 19
+    assert min(covs[3:-K]) == 20, covs
+    assert max(covs) == 20, covs
+
+
+def test_align_fwd_covs_4():
+    K = 10
+
+    ch = khmer.CountingHash(K, 1048576, 1)
+    read = "GTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(19):
+        ch.consume(read)
+
+    ch.consume("GTCGACAAGTCCTTGACAGAG")
+    #                               ^
+    score, g, r, is_t, covs = aligner.align_forward(read)
+
+    print(covs, g)
+    for start in range(0, len(read) - K + 1):
+        print(ch.get(read[start:start + K]), end=' ')
+    print('')
+
+    assert len(covs) == len(read)
+    assert covs[-K] == 19
+    assert min(covs[:-K]) == 20, covs
+    assert max(covs) == 20, covs
+
+
+def test_align_fwd_covs_5():
+    K = 10
+
+    ch = khmer.CountingHash(K, 1048576, 1)
+    read = "GTCGACAAGTCCTTGACAGAT"
+    aligner = khmer.ReadAligner(ch, 0, 0)
+    for i in range(19):
+        ch.consume(read)
+
+    ch.consume("GTCGACAAGTCCTTGACAGCT")
+    #                              ^
+    score, g, r, is_t, covs = aligner.align_forward(read)
+
+    print(covs, g)
+    for start in range(0, len(read) - K + 1):
+        print(ch.get(read[start:start + K]), end=' ')
+    print('')
+
+    assert len(covs) == len(read)
+    assert covs[-K] == 19
+    assert covs[-K - 1] == 19
+    assert min(covs[:-K - 1]) == 20, covs
+    assert max(covs) == 20, covs
 
 
 def test_simple_readalign():
+    return  # @CTB
     ch = khmer.CountingHash(10, 1048576, 1)
     aligner = khmer.ReadAligner(ch, 2, 0)
     for i in range(20):
@@ -43,11 +371,12 @@ def test_simple_readalign():
 #                        AGCTAGGTTCGACAAGT CCT
 #                        ACCTAGGTTCGACAAGTaCC
 #                        --CTAGGTTCGACATGT-CC
-    eq_(graphAlign, 'AGCTAGGTTCGACATGTCC-')
-    eq_(readAlign, 'ACCTAGGTTCGACAAGTACc')
+    eq_(graphAlign, 'AGCTAGGTTCGACATGTCCT')
+    eq_(readAlign, 'ACCTAGGTTCGACAAGTACC')
 
 
 def test_readalign():
+    return  # @CTB
     ch = khmer.CountingHash(10, 1048576, 1)
     aligner = khmer.ReadAligner(ch, 1, 0)
     for i in range(20):
@@ -59,8 +388,8 @@ def test_readalign():
 
     score, graphAlign, readAlign, trunc = aligner.align(read)
 
-    eq_(readAlign, 'ACCTAGGTTCGACATGTACc')
-    eq_(graphAlign, 'AGCTAGGTTCGACAAGTCC-')
+    eq_(readAlign, 'ACCTAGGTTCGACATGTACC')
+    eq_(graphAlign, 'AGCTAGGTTCGACAAGTCCT')
 
 
 ht_seqs = ["TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGG"
@@ -81,7 +410,7 @@ queries = [
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAA"
         "CTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAA"
         "CCTCTTTAC",
-        "score": 278.376028204,
+        "score": 274.76338282696173,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCG"
         "CTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCT"
         "TAACAACCTCTTTAC",
@@ -94,9 +423,9 @@ queries = [
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAA"
         "CTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAA"
         "CCTCTTTAC",
-        "score": 271.753976385,
+        "score": 274.76338282696173,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCG"
-        "CTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCT"
+        "CTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCT"
         "TAACAACCTCTTTAC",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGC"
         "TTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTT"
@@ -107,7 +436,7 @@ queries = [
         "seq": "TAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAAC"
         "TGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAAC"
         "CTCTTTAC",
-        "score": 276.416710585,
+        "score": 272.841515695261,
         "graph_aln": "TAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGC"
         "TTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTT"
         "AACAACCTCTTTAC",
@@ -120,7 +449,7 @@ queries = [
         "seq": "TAAATGCGCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAAC"
         "TGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAAC"
         "CTCTTTAC",
-        "score": 269.794658765,
+        "score": 268.2640868672253,
         "graph_aln": "TAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGC"
         "TTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTT"
         "AACAACCTCTTTAC",
@@ -131,42 +460,42 @@ queries = [
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAA",
-        "score": 97.5386525659,
+        "score": 97.37145206396536,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAA",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAA",
         "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAGATGTTTGATTATCAA",
-        "score": 90.9166007464,
+        "score": 92.79402323592961,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAA",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAGATGTTTGATTATCAA",
         "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTATTGATTATCAA",
-        "score": 92.9385894977,
+        "score": 84.74620322710143,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGT-TTGATTATCAA",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTaTTGATTATCAA",
         "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATTGTTTGATTATCAA",
-        "score": 84.3383420486,
-        "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATAtGTTTGATTATCAA",
-        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATT-GTTTGATTATCAA",
+        "score": 82.2182409986759,
+        "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATaTGTTTGATTATCAA",
+        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAT-TGTTTGATTATCAA",
         "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTATTGATTATCAA",
-        "score": 92.9385894977,
+        "score": 84.74620322710143,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGT-TTGATTATCAA",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTaTTGATTATCAA",
         "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTATAGATTATCAA",
-        "score": 86.3165376783,
+        "score": 80.1687743990657,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGT-TTGATTATCAA",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTaTAGATTATCAA",
         "truncated": False
@@ -175,11 +504,11 @@ queries = [
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATAATTTTGCCGCTTTAAC"
         "TGGGTCTAGTTTCTACTGCAAACTTTCCACCAACTAGTTTTTCTGCATCCTTTGTTGCAATCTTAACAA"
         "CCTCTTTAC",
-        "score": 236.115256507,
-        "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAaTT-TtGCC"
+        "score": 237.81111469018322,
+        "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATcAATTTTGCC"
         "GCTTTAACTGGGTCT-GTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAAT"
         "CTTAACAACCTCTTTAC",
-        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATAA-TTtT-GCCG"
+        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTAT-AATTTTGCCG"
         "CTTTAACTGGGTCTaGTTTCTACTGCAAACTTTCCACCAACTAGTTTTTCTGCATCCTTTGTTGCAATC"
         "TTAACAACCTCTTTAC",
         "truncated": False
@@ -187,38 +516,135 @@ queries = [
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGAAAATAATTAAAAAAAAAAAAA"
         "AAAAAAAAAAAAAAAAAAAAAAAAAA",
-        "score": 44.7543247314,
-        "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATatgtt",
-        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAT-----",
-        "truncated": True
+        "score": 5.331560863368736,
+        "graph_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGGGTC"
+        "TGTTTCTACTGCAAACTTT",
+        "read_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGAAAATAATTAAAAAAAAAAAAAAAAAAAA"
+        "AAAAAAAAAAAAAAAAAAA",
+        "truncated": False
     },
     {
         "seq": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAA"
         "CTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGAAAAATGTCATCCTGTATTGCAATCTTAACAA"
         "CCTCTTTAC",
-        "score": 227.446444943,
+        "score": 274.76338282696173,
         "graph_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCG"
-        "CTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTtTTTCTG-CATCCTGTGTTGCAATC"
+        "CTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGAAAAATGTCATCCTGTATTGCAATC"
         "TTAACAACCTCTTTAC",
         "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGC"
-        "TTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGA-AAAATGtCATCCTGTATTGCAATCT"
+        "TTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGAAAAATGTCATCCTGTATTGCAATCT"
         "TAACAACCTCTTTAC",
         "truncated": False
+    },
+    {  # the motif of 32 bases are identical match to HT seqs, the rest are
+        # random. TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAT" is the from HT seqs
+        "seq":
+        "ACAAGGCCATTTGTTCGCATTCTGAAGCCGGCTTCCACCATGGTACTGGGAAACTGTCGGAATATTAAA"
+        "TGCCCAATTTTTCCCTCTTTTCTTCTATCCGCAGTATGGACACTGTTTTCCTGAATTTCATTGACAGTT"
+        "TAATTTACTGCGGTCACGCGGAACT",
+        "score": 68.17022311739733,
+        "graph_aln":
+        "ACAAGGCCATTTGTTCGCATTCTGAAGCCGGCTTCCACCATGGTACTGGGAAACTGTCGGAATATTAAA"
+        "TGCCCAATTTTTCCCTCTTTTCTTCTATCCGCAGTATGGACACTGTTTTCCTGAATTTCATTGACAGTT"
+        "TAATTTACTGCGGTCACGCGGAACT",
+        "read_aln": "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTAT",
+        "truncated": True,
+        "description": "truncated-alignment-bc-missing-kmers"
+    },
+    {   # Testing for min distance between correctable SNPs
+        # 1st SNP is at position 2+K from beginning, 2nd SNP at position 2+K+K
+        "seq":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATACGTTTGATTATCAATTTTGCCGCTTTAACTGG"
+        "ATCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "score": 265.608525171,
+        "graph_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGG"
+        "GTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "read_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATACGTTTGATTATCAATTTTGCCGCTTTAACTGG"
+        "ATCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "truncated": False,
+        "description": "2 SNPs, one K apart",
+    },
+    {   # Testing for min distance between correctable SNPs
+        # 1st SNP is at position 2+K from beginning, 2nd SNP at position
+        # 2+K+K-1
+        "seq":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATACCTTTGATTATCAATTTTGCCGCTTTAACTGG"
+        "GTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "score": 265.608525171,
+        "graph_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGG"
+        "GTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "read_aln":
+        "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATACGTTTGATTATCAATTTTGCCGCTTTAACTAG"
+        "GTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTATTGCAATCTTAACAAC"
+        "CTCTTTAC",
+        "truncated": False,
+        "description": "2 SNPs, K-2 apart",
     }
+
 ]
 
 
+def check_query(aligner, query):
+    score, graphAlign, readAlign, trunc = aligner.align(query["seq"])
+    print(query["seq"])
+    print(graphAlign, query["graph_aln"])
+    print(readAlign, query["read_aln"])
+    print(trunc, query["truncated"])
+    print(score, query["score"])
+    assert graphAlign == query["graph_aln"], "\n%r != \n%r" % \
+        (graphAlign, query["graph_aln"])
+    assert readAlign == query["read_aln"], "\n%r != \n%r" % \
+        (readAlign, query["read_aln"])
+    eq_(trunc, query["truncated"])
+    if query["score"] > 0:
+        assert_almost_equals(score, query["score"])
+
+
 def test_readalign_new():
+    return  # @CTB
     ch = khmer.CountingHash(32, 1048576, 1)
     aligner = khmer.ReadAligner(ch, 1, 0)
     for seq in ht_seqs:
         ch.consume(seq)
 
     for query in queries:
-        score, graphAlign, readAlign, trunc = aligner.align(query["seq"])
-        print(graphAlign)
-        print(readAlign)
-        eq_(graphAlign, query["graph_aln"])
-        eq_(readAlign, query["read_aln"])
-        eq_(trunc, query["truncated"])
-        # assert_almost_equals(score, query["score"])
+        if "description" in query:
+            check_query.description = query["description"]
+        yield check_query, aligner, query
+
+
+def test_readaligner_load():
+    ct = khmer.CountingHash(32, 1048576, 1)
+    parameters_json = utils.get_test_data('readaligner-default.json')
+    a_aligner = khmer.ReadAligner(ct, 0, 0, filename=parameters_json)
+    a_scoring_matrix = a_aligner.get_scoring_matrix()
+    a_transition_probabilities = a_aligner.get_transition_probabilities()
+    assert a_scoring_matrix[0] == -0.06642736173897607, a_scoring_matrix[0]
+    assert a_transition_probabilities[0][0] == -0.021973842014145723, (
+        a_transition_probabilities[0][0])
+
+    for seq in ht_seqs:
+        ct.consume(seq)
+
+    for query in queries:
+        a_aligner.align(query['seq'])
+
+    b_aligner = khmer.ReadAligner(
+        ct, 0, 0, transition_probabilities=a_transition_probabilities,
+        scoring_matrix=a_scoring_matrix)
+    b_scoring_matrix = b_aligner.get_scoring_matrix()
+    b_transition_probabilities = b_aligner.get_transition_probabilities()
+    assert b_scoring_matrix == a_scoring_matrix, (
+        a_scoring_matrix, b_scoring_matrix)
+    assert b_transition_probabilities == a_transition_probabilities, (
+        a_transition_probabilities, b_transition_probabilities)
diff --git a/tests/test_read_parsers.py b/tests/test_read_parsers.py
index c785772..c55d17a 100644
--- a/tests/test_read_parsers.py
+++ b/tests/test_read_parsers.py
@@ -87,7 +87,7 @@ def test_num_reads_truncated():
     try:
         for read in rparser:
             n_reads += 1
-    except IOError as err:
+    except ValueError as err:
         assert "Sequence is empty" in str(err), str(err)
     assert rparser.num_reads == 1, "%d valid reads in file, got %d" % (
         n_reads, rparser.num_reads)
@@ -109,7 +109,7 @@ def test_gzip_decompression_truncated():
         for read in rparser:
             pass
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
 
 
@@ -120,7 +120,9 @@ def test_gzip_decompression_truncated_pairiter():
         for read in rparser.iter_read_pairs():
             pass
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
+        print(str(err))
+    except ValueError as err:
         print(str(err))
 
 
@@ -141,7 +143,9 @@ def test_bzip2_decompression_truncated():
         for read in rparser:
             pass
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
+        print(str(err))
+    except ValueError as err:
         print(str(err))
 
 
@@ -152,7 +156,9 @@ def test_bzip2_decompression_truncated_pairiter():
         for read in rparser.iter_read_pairs():
             pass
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
+        print(str(err))
+    except ValueError as err:
         print(str(err))
 
 
@@ -162,7 +168,7 @@ def test_badbzip2():
         for read in rparser:
             pass
         assert 0, "this should fail"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
     except ValueError as err:
         print(str(err))
@@ -269,7 +275,7 @@ def test_read_truncated():
         for read in rparser:
             pass
         assert 0, "No exception raised on a truncated file"
-    except IOError as err:
+    except ValueError as err:
         assert "Sequence is empty" in str(err), str(err)
 
 
@@ -317,6 +323,7 @@ def test_read_pair_iterator_in_error_mode():
     assert all(matches)  # Assert ALL the matches. :-]
 
 
+ at attr('linux')
 def test_read_pair_iterator_in_error_mode_xfail():
 
     rparser = \
@@ -327,7 +334,22 @@ def test_read_pair_iterator_in_error_mode_xfail():
         for rpair in rparser.iter_read_pairs():
             pass
         failed = False
-    except IOError as exc:
+    except ValueError as exc:
+        assert "Invalid read pair" in str(exc), str(exc)
+    assert failed
+
+
+def test_read_pair_iterator_in_error_mode_xfail_osxsafe():
+
+    rparser = \
+        ReadParser(utils.get_test_data("test-abund-read-impaired.fa"))
+
+    failed = True
+    try:
+        for rpair in rparser.iter_read_pairs():
+            pass
+        failed = False
+    except ValueError as exc:
         pass
     assert failed
 
@@ -361,6 +383,8 @@ def test_constructor():
         assert 0, "ReadParser shouldn't accept a non-existant file name"
     except ValueError as err:
         print(str(err))
+    except OSError as err:
+        print(str(err))
 
 
 def test_iternext():
@@ -370,7 +394,7 @@ def test_iternext():
         for read_1, read_2 in rparser.iter_read_pairs():
             read_pairs.append(read_1, read_2)
         assert 0, "Shouldn't be able to iterate over non FASTA file"
-    except IOError as err:
+    except OSError as err:
         print(str(err))
     except ValueError as err:
         print(str(err))
diff --git a/tests/test_sandbox_scripts.py b/tests/test_sandbox_scripts.py
index ef85a82..c968e5a 100644
--- a/tests/test_sandbox_scripts.py
+++ b/tests/test_sandbox_scripts.py
@@ -1,5 +1,3 @@
-from __future__ import print_function
-from __future__ import absolute_import
 #
 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2015. It is licensed under
@@ -9,6 +7,10 @@ from __future__ import absolute_import
 
 # pylint: disable=C0111,C0103,E1103,W0612
 
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
 import sys
 import os
 import os.path
@@ -23,6 +25,7 @@ import imp
 from . import khmer_tst_utils as utils
 import khmer
 import screed
+from .test_scripts import _make_counting
 
 
 def scriptpath(script):
@@ -100,7 +103,7 @@ def test_sweep_reads():
             contigfile, readfile, 'junkfile.fa']
 
     status, out, err = utils.runscript(
-        script, args, in_dir, fail_ok=True, sandbox=True)
+        script, args, in_dir, sandbox=True)
 
     # check if the bad file was skipped without issue
     assert 'ERROR' in err, err
@@ -146,7 +149,7 @@ def test_sweep_reads_fq():
             contigfile, readfile, 'junkfile.fa']
 
     status, out, err = utils.runscript(
-        script, args, in_dir, fail_ok=True, sandbox=True)
+        script, args, in_dir, sandbox=True)
 
     # check if the bad file was skipped without issue
     assert 'ERROR' in err, err
@@ -255,3 +258,32 @@ def test_saturate_by_median():
     status, out, err = utils.runscript(script, args, sandbox=True)
 
     assert status == 0
+
+
+def test_count_kmers_1():
+    infile = utils.get_temp_filename('input.fa')
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), infile)
+    ctfile = _make_counting(infile)
+
+    script = scriptpath('count-kmers.py')
+    args = [ctfile, infile]
+
+    status, out, err = utils.runscript(script, args, os.path.dirname(infile),
+                                       sandbox=True)
+
+    out = out.splitlines()
+    assert 'TTGTAACCTGTGTGGGGTCG,1' in out
+
+
+def test_count_kmers_2_single():
+    infile = utils.get_temp_filename('input.fa')
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), infile)
+
+    script = scriptpath('count-kmers-single.py')
+    args = ['-x', '1e7', '-k', '20', '-N', '2', infile]
+
+    status, out, err = utils.runscript(script, args, os.path.dirname(infile),
+                                       sandbox=True)
+
+    out = out.splitlines()
+    assert 'TTGTAACCTGTGTGGGGTCG,1' in out
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index e7cdc8d..25c6b01 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -18,7 +18,17 @@ from . import khmer_tst_utils as utils
 import argparse
 import khmer.kfile
 from khmer import khmer_args
-from cStringIO import StringIO
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+import sys
+
+
+# For map(long, [list of ints]) cross-version hackery
+if sys.version_info.major > 2:
+    long = int
 
 
 def test_check_space():
@@ -36,14 +46,16 @@ def test_check_space():
 
 
 def test_check_tablespace():
+    outfile = utils.get_test_data('truncated.fq')
     save_stderr, sys.stderr = sys.stderr, io.StringIO()
 
     parser = khmer_args.build_counting_args()
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(args, 'countgraph', force=False,
-                                              _testhook_free_space=0)
+        tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
+        khmer.kfile.check_space_for_hashtable(outfile, tablesize,
+                                              False, _testhook_free_space=0)
         assert 0, "this should fail"
     except SystemExit as e:
         print(str(e))
@@ -68,12 +80,15 @@ def test_check_space_force():
 def test_check_tablespace_force():
     save_stderr, sys.stderr = sys.stderr, io.StringIO()
 
+    outfile = utils.get_test_data('truncated')
+
     parser = khmer_args.build_counting_args()
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(args, 'countgraph', True,
-                                              _testhook_free_space=0)
+        tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
+        khmer.kfile.check_space_for_hashtable(outfile, tablesize,
+                                              True, _testhook_free_space=0)
         assert True, "this should pass"
     except SystemExit as e:
         print(str(e))
@@ -93,6 +108,18 @@ def test_invalid_file_warn():
         sys.stderr = save_stderr
 
 
+def test_check_valid_stdin_nowarn():
+    save_stderr, sys.stderr = sys.stderr, io.StringIO()
+    try:
+        khmer.kfile.check_valid_file_exists(["-"])
+        err = sys.stderr.getvalue()
+        assert err.count("\n") == 0, err
+    except SystemExit as e:
+        print(str(e))
+    finally:
+        sys.stderr = save_stderr
+
+
 FakeArgparseObject = collections.namedtuple('FakeArgs',
                                             ['ksize', 'n_tables',
                                              'max_tablesize',
@@ -108,7 +135,8 @@ def test_create_countgraph_1():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     countgraph = khmer_args.create_countgraph(args)
-    assert countgraph.hashsizes() == [2499997L, 2499989L, 2499983L, 2499967L]
+    expected_hashsz = utils.longify([2499997, 2499989, 2499983, 2499967])
+    assert countgraph.hashsizes() == expected_hashsz, countgraph.hashsizes()
     assert sum(countgraph.hashsizes()) < max_mem, sum(countgraph.hashsizes())
 
 
@@ -171,10 +199,11 @@ def test_create_nodegraph_1():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     nodegraph = khmer_args.create_nodegraph(args)
-    assert nodegraph.hashsizes() == [19999999L, 19999981L,
-                                     19999963L, 19999927L]
+    expected_hashsz = utils.longify([19999999, 19999981, 19999963, 19999927])
+    assert nodegraph.hashsizes() == expected_hashsz, nodegraph.hashsizes()
 
-    assert sum(nodegraph.hashsizes())/8.0 < max_mem, sum(nodegraph.hashsizes())
+    assert sum(nodegraph.hashsizes()) / \
+        8.0 < max_mem, sum(nodegraph.hashsizes())
 
 
 def test_create_nodegraph_2():
@@ -221,7 +250,7 @@ def test_create_nodegraph_4_multiplier():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     nodegraph = khmer_args.create_nodegraph(args, multiplier=2.0)
-    assert sum(nodegraph.hashsizes())/8.0 < max_mem / 2.0, \
+    assert sum(nodegraph.hashsizes()) / 8.0 < max_mem / 2.0, \
         sum(nodegraph.hashsizes())
 
 
@@ -236,9 +265,7 @@ def test_report_on_config_bad_hashtype():
     try:
         khmer_args.report_on_config(args, 'foograph')
         assert 0, "the previous statement should raise an exception"
-    except AssertionError:
-        raise
-    except Exception as err:
+    except ValueError as err:
         assert "unknown graph type: foograph" in str(err), str(err)
 
 
@@ -253,9 +280,7 @@ def test_fail_calculate_foograph_size():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     try:
-        nodegraph = khmer_args._calculate_tablesize(args, 'foograph')
+        nodegraph = khmer_args.calculate_tablesize(args, 'foograph')
         assert 0, "previous statement should fail"
-    except AssertionError:
-        raise
-    except Exception as err:
+    except ValueError as err:
         assert "unknown graph type: foograph" in str(err), str(err)
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index ffbbb81..ace3d89 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -1,6 +1,3 @@
-from __future__ import print_function
-from __future__ import absolute_import
-from __future__ import unicode_literals
 #
 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2015. It is licensed under
@@ -8,6 +5,10 @@ from __future__ import unicode_literals
 # Contact: khmer-project at idyll.org
 #
 
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
 # pylint: disable=C0111,C0103,E1103,W0612
 
 import json
@@ -18,7 +19,6 @@ import shutil
 from io import StringIO
 import traceback
 from nose.plugins.attrib import attr
-import subprocess
 import threading
 import bz2
 import io
@@ -42,7 +42,7 @@ def test_check_space():
 
 def test_load_into_counting():
     script = 'load-into-counting.py'
-    args = ['-x', '1e3', '-N', '2', '-k', '20', '-t']
+    args = ['-x', '1e3', '-N', '2', '-k', '20']
 
     outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -56,7 +56,7 @@ def test_load_into_counting():
 
 def test_load_into_counting_tablesize_warning():
     script = 'load-into-counting.py'
-    args = ['-k', '20', '-t']
+    args = ['-k', '20']
 
     outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -70,7 +70,7 @@ def test_load_into_counting_tablesize_warning():
 
 def test_load_into_counting_max_memory_usage_parameter():
     script = 'load-into-counting.py'
-    args = ['-M', '2e3', '-k', '20', '-t']
+    args = ['-M', '2e3', '-k', '20']
 
     outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -87,7 +87,7 @@ def test_load_into_counting_max_memory_usage_parameter():
 
 def test_load_into_counting_abundance_dist_nobig():
     script = 'load-into-counting.py'
-    args = ['-x', '1e3', '-N', '2', '-k', '20', '-t', '-b']
+    args = ['-x', '1e3', '-N', '2', '-k', '20', '-b']
 
     outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -109,7 +109,7 @@ def test_load_into_counting_abundance_dist_nobig():
 
 def test_load_into_counting_nonwritable():
     script = 'load-into-counting.py'
-    args = ['-x', '1e3', '-N', '2', '-k', '20', '-t']
+    args = ['-x', '1e3', '-N', '2', '-k', '20']
 
     outfile = utils.get_temp_filename('test-nonwritable')
     with open(outfile, 'w') as fout:
@@ -128,7 +128,7 @@ def test_load_into_counting_nonwritable():
 @attr('huge')
 def test_load_into_counting_toobig():
     script = 'load-into-counting.py'
-    args = ['-x', '1e12', '-N', '2', '-k', '20', '-t', '--force']
+    args = ['-x', '1e12', '-N', '2', '-k', '20', '--force']
 
     outfile = utils.get_temp_filename('out.kh')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -157,7 +157,7 @@ def test_load_into_counting_fail():
 
 def test_load_into_counting_multifile():
     script = 'load-into-counting.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '20', '-t']
+    args = ['-x', '1e7', '-N', '2', '-k', '20']
 
     outfile = utils.get_temp_filename('out.kh')
     infile = utils.get_test_data('test-abund-read-2.fa')
@@ -172,7 +172,7 @@ def test_load_into_counting_multifile():
 
 def test_load_into_counting_tsv():
     script = 'load-into-counting.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '20', '-t', '-s', 'tsv']
+    args = ['-x', '1e7', '-N', '2', '-k', '20', '-s', 'tsv']
 
     outfile = utils.get_temp_filename('out.ct')
     tabfile = outfile + '.info.tsv'
@@ -195,7 +195,7 @@ def test_load_into_counting_tsv():
 
 def test_load_into_counting_json():
     script = 'load-into-counting.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '20', '-t', '-s', 'json']
+    args = ['-x', '1e7', '-N', '2', '-k', '20', '-s', 'json']
 
     outfile = utils.get_temp_filename('out.ct')
     jsonfile = outfile + '.info.json'
@@ -313,6 +313,21 @@ def test_filter_abund_2():
     assert len(seqs) == 2, seqs
     assert 'GGTTGACGGGGCTCAGGG' in seqs
 
+
+def test_filter_abund_2_stdin():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+    counting_ht = _make_counting(infile, K=17)
+
+    script = 'filter-abund.py'
+    args = ['-C', '1', counting_ht, '-']
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status == 1
+    assert "Accepting input from stdin; output filename must be provided" \
+           in str(err)
+
 # make sure that FASTQ records are retained.
 
 
@@ -358,7 +373,7 @@ def test_filter_abund_4_fq_casava_18():
     outfile = infile + '.abundfilt'
     assert os.path.exists(outfile), outfile
 
-    seqs = set([r.name for r in screed.open(outfile, parse_description=False)])
+    seqs = set([r.name for r in screed.open(outfile)])
     assert 'pair:foo 1::N' in seqs, seqs
 
 
@@ -369,7 +384,7 @@ def test_filter_abund_1_singlefile():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'filter-abund-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-t', infile]
+    args = ['-x', '1e7', '-N', '2', '-k', '17', infile]
     (status, out, err) = utils.runscript(script, args, in_dir)
 
     assert 'Total number of unique k-mers: 98' in err, err
@@ -390,7 +405,7 @@ def test_filter_abund_2_singlefile():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'filter-abund-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-t', '--savetable',
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '--savetable',
             tabfile, infile]
     (status, out, err) = utils.runscript(script, args, in_dir)
 
@@ -418,7 +433,7 @@ def test_filter_abund_2_singlefile_fq_casava_18():
     outfile = infile + '.abundfilt'
     assert os.path.exists(outfile), outfile
 
-    seqs = set([r.name for r in screed.open(outfile, parse_description=False)])
+    seqs = set([r.name for r in screed.open(outfile)])
     assert 'pair:foo 1::N' in seqs, seqs
 
 
@@ -508,7 +523,7 @@ def test_filter_abund_7_retain_Ns():
     assert os.path.exists(outfile), outfile
 
     # test for a sequence with an 'N' in it --
-    names = set([r.name for r in screed.open(outfile, parse_description=0)])
+    names = set([r.name for r in screed.open(outfile)])
     assert '895:1:37:17593:9954 1::FOO_withN' in names, names
 
     # check to see if that 'N' was properly changed to an 'A'
@@ -541,7 +556,7 @@ def test_filter_abund_single_8_retain_Ns():
     assert os.path.exists(outfile), outfile
 
     # test for a sequence with an 'N' in it --
-    names = set([r.name for r in screed.open(outfile, parse_description=0)])
+    names = set([r.name for r in screed.open(outfile)])
     assert '895:1:37:17593:9954 1::FOO_withN' in names, names
 
     # check to see if that 'N' was properly changed to an 'A'
@@ -618,7 +633,7 @@ def test_filter_stoptags_fq():
     assert 'GGTTGACGGGGCTCAGGG' in seqs, seqs
 
     # make sure that record names are carried through unparsed
-    names = [r.name for r in screed.open(outfile, parse_description=False)]
+    names = [r.name for r in screed.open(outfile)]
     names = set(names)
     assert 'seq 1::BAR' in names
 
@@ -636,15 +651,15 @@ def test_count_median():
 
     assert os.path.exists(outfile), outfile
 
-    data = [x.strip() for x in open(outfile)]
+    data = [x.strip() for x in open(outfile).readlines()[1:]]
     data = set(data)
     assert len(data) == 2, data
-    assert 'seq 1001 1001.0 0.0 18' in data
-    assert '895:1:37:17593:9954/1 1 103.803741455 303.702941895 114' in data
+    assert 'seq,1001,1001.0,0.0,18' in data, data
+    assert '895:1:37:17593:9954/1,1,103.803741455,303.702941895,114' in data
 
 
-def test_count_median_fq():
-    infile = utils.get_temp_filename('test.fa')
+def test_count_median_fq_csv():
+    infile = utils.get_temp_filename('test.fq')
     outfile = infile + '.counts'
 
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
@@ -658,33 +673,28 @@ def test_count_median_fq():
 
     data = [x.strip() for x in open(outfile)]
     data = set(data)
-    assert len(data) == 2, data
-    assert 'seq 1001 1001.0 0.0 18' in data
-    assert '895:1:37:17593:9954 1 103.803741455 303.702941895 114' in data
+    assert len(data) == 4, data
+    assert 'name,median,average,stddev,seqlen' in data
+    assert 'seq,1001,1001.0,0.0,18' in data
 
+    # verify that sequence names remain unparsed
+    names = set([line.split(',')[0] for line in data])
+    assert '895:1:37:17593:9954 1::FOO' in names, names
 
-def test_count_median_fq_csv():
-    infile = utils.get_temp_filename('test.fa')
-    outfile = infile + '.counts'
+
+def test_count_median_fq_csv_stdout():
+    infile = utils.get_temp_filename('test.fq')
+    outfile = '-'
 
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
     counting_ht = _make_counting(infile, K=8)
 
     script = 'count-median.py'
-    args = ['--csv', counting_ht, infile, outfile]
-    utils.runscript(script, args)
-
-    assert os.path.exists(outfile), outfile
-
-    data = [x.strip() for x in open(outfile)]
-    data = set(data)
-    assert len(data) == 4, data
-    assert 'name,median,average,stddev,seqlen' in data
-    assert 'seq,1001,1001.0,0.0,18' in data
+    args = [counting_ht, infile, outfile]
+    (status, out, err) = utils.runscript(script, args)
 
-    # verify that sequence names remain unparsed with '--csv'
-    names = set([line.split(',')[0] for line in data])
-    assert '895:1:37:17593:9954 1::FOO' in names, names
+    assert 'name,median,average,stddev,seqlen' in out
+    assert 'seq,1001,1001.0,0.0,18' in out
 
 
 def test_load_graph():
@@ -708,7 +718,7 @@ def test_load_graph():
 
     try:
         ht = khmer.load_hashbits(ht_file)
-    except IOError as err:
+    except OSError as err:
         assert 0, str(err)
     ht.load_tagset(tagset_file)
 
@@ -750,6 +760,13 @@ def test_oxli_build_graph():
     assert x == (1, 0), x
 
 
+def test_oxli_nocommand():
+    script = 'oxli'
+
+    (status, out, err) = utils.runscript(script, [])
+    assert status == 0
+
+
 def test_load_graph_no_tags():
     script = 'load-graph.py'
     args = ['-x', '1e7', '-N', '2', '-k', '20', '-n']
@@ -909,7 +926,7 @@ def test_load_graph_max_memory_usage_parameter():
 
     try:
         ht = khmer.load_hashbits(ht_file)
-    except IOError as err:
+    except OSError as err:
         assert 0, str(err)
 
     assert (sum(ht.hashsizes()) / 8.) < 2e7, ht.hashsizes()
@@ -1235,7 +1252,7 @@ def test_extract_partitions_header_whitespace():
     assert dist.strip() == '1 11960 11960 11960', dist.strip()
 
     parts = [r.name.split('\t')[1]
-             for r in screed.open(partfile, parse_description=False)]
+             for r in screed.open(partfile)]
     assert len(parts) == 13538, len(parts)
     parts = set(parts)
     assert len(parts) == 12602, len(parts)
@@ -1264,12 +1281,12 @@ def test_extract_partitions_fq():
     dist = open(distfile).readline()
     assert dist.strip() == '99 1 1 99'
 
-    screed_iter = screed.open(partfile, parse_description=False)
+    screed_iter = screed.open(partfile)
     names = [r.name.split('\t')[0] for r in screed_iter]
     assert '35 1::FOO' in names
     assert '46 1::FIZ' in names
 
-    screed_iter = screed.open(partfile, parse_description=False)
+    screed_iter = screed.open(partfile)
     parts = [r.name.split('\t')[1] for r in screed_iter]
 
     assert len(parts) == 99, len(parts)
@@ -1326,7 +1343,7 @@ def test_extract_partitions_no_output_groups():
     args = ['-n', 'extracted', partfile]
 
     # We expect a sys.exit -> we need the test to be tolerant
-    _, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    status, out, err = utils.runscript(script, args, in_dir)
     assert "NOT outputting groups! Beware!" in err
     # Group files are created after output_groups is
     # checked. They should not exist in this scenario
@@ -1391,8 +1408,9 @@ def test_extract_partitions_no_groups():
     script = 'extract-partitions.py'
     args = ['extracted', empty_file]
 
-    _, _, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    status, _, err = utils.runscript(script, args, in_dir, fail_ok=True)
     assert "ERROR: Input file", "is empty; Exiting." in err
+    assert status != 0
     # No group files should be created
     groupfile = os.path.join(in_dir, 'extracted.group0000.fa')
 
@@ -1414,16 +1432,6 @@ def test_abundance_dist():
 
     with open(outfile) as fp:
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
-        line = fp.readline().strip()
-        assert line == '1001 2 98 1.0', line
-
-    os.remove(outfile)
-    args = ['-z', '--csv', htfile, infile, outfile]
-    utils.runscript(script, args, in_dir)
-
-    with open(outfile) as fp:
-        line = fp.readline().strip()
         assert (line == 'abundance,count,cumulative,cumulative_fraction'), line
         line = fp.readline().strip()
         assert line == '1,96,96,0.98', line
@@ -1431,9 +1439,8 @@ def test_abundance_dist():
         assert line == '1001,2,98,1.0', line
 
 
-def test_abundance_dist_nobigcount():
+def test_abundance_dist_stdout():
     infile = utils.get_temp_filename('test.fa')
-    outfile = utils.get_temp_filename('test.dist')
     in_dir = os.path.dirname(infile)
 
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
@@ -1441,35 +1448,32 @@ def test_abundance_dist_nobigcount():
     htfile = _make_counting(infile, K=17)
 
     script = 'abundance-dist.py'
-    args = ['-b', '-z', htfile, infile, outfile]
-    utils.runscript(script, args, in_dir)
+    args = ['-z', htfile, infile, "-"]
+    (status, out, err) = utils.runscript(script, args, in_dir)
 
-    with open(outfile) as fp:
-        line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
-        line = fp.readline().strip()
-        assert line == '255 2 98 1.0', line
+    assert '1,96,96,0.98' in out, out
+    assert '1001,2,98,1.0' in out, out
 
 
-def test_abundance_dist_single():
+def test_abundance_dist_nobigcount():
     infile = utils.get_temp_filename('test.fa')
     outfile = utils.get_temp_filename('test.dist')
     in_dir = os.path.dirname(infile)
 
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
-    script = 'abundance-dist-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', infile,
-            outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir)
+    htfile = _make_counting(infile, K=17)
 
-    assert 'Total number of unique k-mers: 98' in err, err
+    script = 'abundance-dist.py'
+    args = ['-b', '-z', htfile, infile, outfile]
+    utils.runscript(script, args, in_dir)
 
     with open(outfile) as fp:
+        line = fp.readline().strip()    # skip header
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
+        assert line == '1,96,96,0.98', line
         line = fp.readline().strip()
-        assert line == '1001 2 98 1.0', line
+        assert line == '255,2,98,1.0', line
 
 
 def test_abundance_dist_threaded():
@@ -1480,17 +1484,18 @@ def test_abundance_dist_threaded():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'abundance-dist-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', '--threads', '18',
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '--threads', '18',
             infile, outfile]
     (status, out, err) = utils.runscript(script, args, in_dir)
 
     assert 'Total number of unique k-mers: 98' in err, err
 
     with open(outfile) as fp:
+        line = fp.readline().strip()    # skip header
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
+        assert line == '1,96,96,0.98', line
         line = fp.readline().strip()
-        assert line == '1001 2 98 1.0', line
+        assert line == '1001,2,98,1.0', line
 
 
 def test_abundance_dist_single_csv():
@@ -1501,7 +1506,7 @@ def test_abundance_dist_single_csv():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'abundance-dist-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '--csv', infile,
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', infile,
             outfile]
     (status, out, err) = utils.runscript(script, args, in_dir)
 
@@ -1526,10 +1531,11 @@ def test_abundance_dist_single_nobigcount():
     utils.runscript(script, args, in_dir)
 
     with open(outfile) as fp:
+        line = fp.readline().strip()    # skip header
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
+        assert line == '1,96,96,0.98', line
         line = fp.readline().strip()
-        assert line == '255 2 98 1.0', line
+        assert line == '255,2,98,1.0', line
 
 
 def test_abundance_dist_single_nosquash():
@@ -1540,14 +1546,15 @@ def test_abundance_dist_single_nosquash():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'abundance-dist-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', infile, outfile]
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', infile, outfile]
     utils.runscript(script, args, in_dir)
 
     with open(outfile) as fp:
+        line = fp.readline().strip()    # skip header
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
+        assert line == '1,96,96,0.98', line
         line = fp.readline().strip()
-        assert line == '1001 2 98 1.0', line
+        assert line == '1001,2,98,1.0', line
 
 
 def test_abundance_dist_single_savetable():
@@ -1559,15 +1566,16 @@ def test_abundance_dist_single_savetable():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = 'abundance-dist-single.py'
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', '--savetable',
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '--savetable',
             tabfile, infile, outfile]
     utils.runscript(script, args, in_dir)
 
     with open(outfile) as fp:
+        line = fp.readline().strip()    # skip header
         line = fp.readline().strip()
-        assert line == '1 96 96 0.98', line
+        assert line == '1,96,96,0.98', line
         line = fp.readline().strip()
-        assert line == '1001 2 98 1.0', line
+        assert line == '1001,2,98,1.0', line
 
 
 def test_do_partition():
@@ -1620,7 +1628,7 @@ def test_do_partition_2_fq():
 
     partfile = os.path.join(in_dir, 'random-20-a.fq.part')
 
-    screed_iter = screed.open(partfile, parse_description=False)
+    screed_iter = screed.open(partfile)
     names = [r.name.split('\t')[0] for r in screed_iter]
     assert '35 1::FOO' in names
     assert '46 1::FIZ' in names
@@ -1720,19 +1728,20 @@ def test_interleave_reads_broken_fq_3():
     assert "ERROR: This doesn't look like paired data!" in err
 
 
-def test_interleave_reads_broken_fq_4():
+def test_interleave_reads_broken_fq_5():
     # test input files
-    infile1 = utils.get_test_data('paired-mixed-broken.fq')
+    infile1 = utils.get_test_data('paired-broken4.fq.1')
+    infile2 = utils.get_test_data('paired-broken4.fq.2')
 
     # actual output file
     outfile = utils.get_temp_filename('out.fq')
 
     script = 'interleave-reads.py'
-    args = [infile1, '-o', outfile]
+    args = [infile1, infile2, '-o', outfile]
 
     status, out, err = utils.runscript(script, args, fail_ok=True)
     assert status == 1
-    assert "ERROR: given only one filename, that doesn't contain _R1_" in err
+    assert "ERROR: This doesn't look like paired data!" in err
 
 
 def test_interleave_reads_2_fa():
@@ -1847,8 +1856,8 @@ def test_extract_paired_reads_2_fq():
     assert os.path.exists(outfile2), outfile2
 
     n = 0
-    for r, q in zip(screed.open(ex_outfile1, parse_description=False),
-                    screed.open(outfile1, parse_description=False)):
+    for r, q in zip(screed.open(ex_outfile1),
+                    screed.open(outfile1)):
         n += 1
         assert r.name == q.name, (r.name, q.name, n)
         assert r.sequence == q.sequence
@@ -1856,8 +1865,8 @@ def test_extract_paired_reads_2_fq():
     assert n > 0
 
     n = 0
-    for r, q in zip(screed.open(ex_outfile2, parse_description=False),
-                    screed.open(outfile2, parse_description=False)):
+    for r, q in zip(screed.open(ex_outfile2),
+                    screed.open(outfile2)):
         n += 1
         assert r.name == q.name
         assert r.sequence == q.sequence
@@ -2090,6 +2099,15 @@ def test_split_paired_reads_2_mixed_fq_require_pair():
     assert "is not part of a pair" in err
 
 
+def test_split_paired_reads_2_stdin_no_out():
+    script = 'split-paired-reads.py'
+    args = ['-']
+
+    status, out, err = utils.runscript(script, args, fail_ok=True)
+    assert status == 1
+    assert "Accepting input from stdin; output filenames must " in err
+
+
 def test_split_paired_reads_2_mixed_fq():
     # test input file
     infile = utils.get_temp_filename('test.fq')
@@ -2392,12 +2410,20 @@ def test_sample_reads_randomly_fq():
                   '850:2:1:2562:1308/1',
                   '850:2:1:3123:15968/2'}
 
-    seqs = set([r.name for r in screed.open(outfile,
-                                            parse_description=False)])
+    seqs = set([r.name for r in screed.open(outfile)])
     print(list(sorted(seqs)))
     assert seqs == answer
 
 
+def test_sample_reads_randomly_stdin_no_out():
+    script = 'sample-reads-randomly.py'
+    args = ['-']
+
+    (status, out, err) = utils.runscript(script, args, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename" in err, err
+
+
 def test_fastq_to_fasta():
 
     script = 'fastq-to-fasta.py'
@@ -2418,8 +2444,7 @@ def test_fastq_to_fasta():
     assert len(out.splitlines()) == 2, len(out.splitlines())
     assert "No lines dropped" in err
 
-    names = [r.name for r in screed.open(clean_outfile,
-                                         parse_description=False)]
+    names = [r.name for r in screed.open(clean_outfile)]
     assert '895:1:1:1246:14654 1:N:0:NNNNN' in names, names
 
     args = [n_infile, '-n', '-o', n_outfile]
@@ -2465,7 +2490,7 @@ def test_extract_long_sequences_fa():
     countlines = sum(1 for line in open(fa_outfile))
     assert countlines == 22, countlines
 
-    names = [r.name for r in screed.open(fa_outfile, parse_description=False)]
+    names = [r.name for r in screed.open(fa_outfile)]
     assert "895:1:37:17593:9954/1" in names
     assert "895:1:37:17593:9954/2" in names
 
@@ -2487,7 +2512,7 @@ def test_extract_long_sequences_fq():
     countlines = sum(1 for line in open(fq_outfile))
     assert countlines == 44, countlines
 
-    names = [r.name for r in screed.open(fq_outfile, parse_description=False)]
+    names = [r.name for r in screed.open(fq_outfile)]
     assert "895:1:37:17593:9954 1::foo" in names
     assert "895:1:37:17593:9954 2::foo" in names
 
@@ -2585,37 +2610,8 @@ def test_count_overlap_invalid_datafile():
     args = ['--ksize', '20', '--n_tables', '2', '--max-tablesize', '10000000',
             htfile + '.pt', htfile + '.pt', outfile]
     (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
-    if sys.version_info.major == 2:
-        assert "IOError" in err
-    else:
-        assert "OSError" in err
-
-
-def test_count_overlap():
-    seqfile1 = utils.get_temp_filename('test-overlap1.fa')
-    in_dir = os.path.dirname(seqfile1)
-    seqfile2 = utils.get_temp_filename('test-overlap2.fa', in_dir)
-    outfile = utils.get_temp_filename('overlap.out', in_dir)
-    curvefile = utils.get_temp_filename('overlap.out.curve', in_dir)
-    shutil.copy(utils.get_test_data('test-overlap1.fa'), seqfile1)
-    shutil.copy(utils.get_test_data('test-overlap2.fa'), seqfile2)
-    htfile = _make_graph(seqfile1, ksize=20)
-    script = 'count-overlap.py'
-    args = ['--ksize', '20', '--n_tables', '2', '--max-tablesize', '10000000',
-            htfile + '.pt', seqfile2, outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir)
-    assert status == 0
-    assert os.path.exists(outfile), outfile
-    data = [x.strip() for x in open(outfile)]
-    data = set(data)
-    assert '# of unique k-mers in dataset2: 759020' in data, data
-    assert '# of overlap unique k-mers: 245547' in data
-    assert os.path.exists(curvefile), curvefile
-    data = [x.strip() for x in open(curvefile)]
-    data = set(data)
-    assert '178630 1134' in data, data
-    assert '496280 2904' in data
-    assert '752031 238558' in data
+    assert status != 0
+    assert "OSError" in err
 
 
 def test_count_overlap_csv():
@@ -2629,7 +2625,7 @@ def test_count_overlap_csv():
     htfile = _make_graph(seqfile1, ksize=20)
     script = 'count-overlap.py'
     args = ['--ksize', '20', '--n_tables', '2', '--max-tablesize',
-            '10000000', '--csv', htfile + '.pt', seqfile2, outfile]
+            '10000000', htfile + '.pt', seqfile2, outfile]
     (status, out, err) = utils.runscript(script, args, in_dir)
     assert status == 0
     assert os.path.exists(outfile), outfile
@@ -2680,29 +2676,30 @@ def execute_streaming_diginorm(ifilename):
     return in_dir + '/outfile'
 
 
-def execute_load_graph_streaming(filename):
+def _execute_load_graph_streaming(filename):
     '''Helper function for the matrix of streaming tests using screed via
     filter-abund-single, i.e. uncompressed fasta, gzip fasta, bz2 fasta,
     uncompressed fastq, etc.
     This is not directly executed but is run by the tests themselves
     '''
 
-    script = 'load-graph.py'
-    args = '-x 1e7 -N 2 -k 20 out -'
-
+    scripts = utils.scriptpath()
     infile = utils.get_temp_filename('temp')
     in_dir = os.path.dirname(infile)
     shutil.copyfile(utils.get_test_data(filename), infile)
-    (status, out, err) = utils.runscriptredirect(script, args, infile, in_dir)
+
+    args = '-x 1e7 -N 2 -k 20 out -'
+
+    cmd = 'cat {infile} | {scripts}/load-graph.py {args}'.format(
+        infile=infile, scripts=scripts, args=args)
+
+    (status, out, err) = utils.run_shell_cmd(cmd, in_directory=in_dir)
 
     if status != 0:
-        for line in out:
-            print(out)
-        for line in err:
-            print(err)
+        print(out)
+        print(err)
         assert status == 0, status
-    err.seek(0)
-    err = err.read()
+
     assert 'Total number of unique k-mers: 3960' in err, err
 
     ht_file = os.path.join(in_dir, 'out.pt')
@@ -2778,34 +2775,34 @@ def test_screed_streaming_gzipfa():
 
 def test_read_parser_streaming_ufa():
     # uncompressed FASTA
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fa'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fa'))
 
 
 def test_read_parser_streaming_ufq():
     # uncompressed FASTQ
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fq'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fq'))
 
 
 @attr('known_failing')
 def test_read_parser_streaming_bzfq():
     # bzip compressed FASTQ
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fq.bz2'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fq.bz2'))
 
 
 def test_read_parser_streaming_gzfq():
     # gzip compressed FASTQ
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fq.gz'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fq.gz'))
 
 
 @attr('known_failing')
 def test_read_parser_streaming_bzfa():
     # bzip compressed FASTA
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fa.bz2'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fa.bz2'))
 
 
 def test_read_parser_streaming_gzfa():
     # gzip compressed FASTA
-    execute_load_graph_streaming(utils.get_test_data('random-20-a.fa.gz'))
+    _execute_load_graph_streaming(utils.get_test_data('random-20-a.fa.gz'))
 
 
 def test_readstats():
@@ -2891,12 +2888,20 @@ def test_trim_low_abund_1_duplicate_filename_err():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile]
-    try:
-        utils.runscript('trim-low-abund.py', args, in_dir)
-        raise Exception("should not reach this")
-    except AssertionError:
-        # an error should be raised by passing 'infile' twice.
-        pass
+    (status, out, err) = utils.runscript('trim-low-abund.py', args, in_dir,
+                                         fail_ok=True)
+    assert status == 1
+    assert "Error: Cannot input the same filename multiple times." in str(err)
+
+
+def test_trim_low_abund_1_stdin_err():
+    args = ["-"]
+
+    (status, out, err) = utils.runscript('trim-low-abund.py', args,
+                                         fail_ok=True)
+    assert status == 1
+    assert "Accepting input from stdin; output filename must be provided" \
+           in str(err)
 
 
 def test_trim_low_abund_2():
@@ -3037,7 +3042,7 @@ def test_trim_low_abund_keep_paired_casava18():
     outfile = infile + '.abundtrim'
     assert os.path.exists(outfile), outfile
 
-    seqs = [r.name for r in screed.open(outfile, parse_description=False)]
+    seqs = [r.name for r in screed.open(outfile)]
     assert seqs[-2:] == ['pair:foo 1::N', 'pair:foo 2::N'], seqs
 
 
@@ -3233,3 +3238,74 @@ def test_roundtrip_commented_format():
     r = open(infile).read()
     r2 = open(outfile).read()
     assert r == r2, (r, r2)
+
+
+def test_unique_kmers_defaults():
+    infile = utils.get_temp_filename('random-20-a.fa')
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), infile)
+
+    args = ['-k', '20', '-e', '0.01', infile]
+
+    _, out, err = utils.runscript('unique-kmers.py', args,
+                                  os.path.dirname(infile))
+
+    err = err.splitlines()
+    assert ('Estimated number of unique 20-mers in {0}: 3950'.format(infile)
+            in err)
+    assert 'Total estimated number of unique 20-mers: 3950' in err
+
+
+def test_unique_kmers_report_fp():
+    infile = utils.get_temp_filename('random-20-a.fa')
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), infile)
+    outfile = utils.get_temp_filename('report.unique')
+
+    args = ['-k', '20', '-e', '0.01', '-R', outfile, infile]
+
+    _, out, err = utils.runscript('unique-kmers.py', args,
+                                  os.path.dirname(infile))
+
+    err = err.splitlines()
+    assert ('Estimated number of unique 20-mers in {0}: 3950'.format(infile)
+            in err)
+    assert 'Total estimated number of unique 20-mers: 3950' in err
+
+    with open(outfile, 'r') as report_fp:
+        outf = report_fp.read().splitlines()
+        assert '3950 20 (total)' in outf
+        assert '3950 20 total' in outf
+
+
+def test_unique_kmers_diagnostics():
+    infile = utils.get_temp_filename('random-20-a.fa')
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), infile)
+
+    args = ['-k', '20', '-e', '0.01', '--diagnostics', infile]
+
+    _, out, err = utils.runscript('unique-kmers.py', args,
+                                  os.path.dirname(infile))
+
+    out = out.splitlines()
+    assert ('expected_fp\tnumber_hashtable(Z)\t'
+            'size_hashtable(H)\texpected_memory_usage' in err)
+
+
+def test_unique_kmers_multiple_inputs():
+    infiles = []
+    for fname in ('random-20-a.fa', 'paired-mixed.fa'):
+        infile = utils.get_temp_filename(fname)
+        shutil.copyfile(utils.get_test_data(fname), infile)
+        infiles.append(infile)
+
+    args = ['-k', '20', '-e', '0.01']
+    args += infiles
+
+    _, out, err = utils.runscript('unique-kmers.py', args,
+                                  os.path.dirname(infile))
+
+    err = err.splitlines()
+    assert ('Estimated number of unique 20-mers in {0}: 3950'
+            .format(infiles[0]) in err)
+    assert ('Estimated number of unique 20-mers in {0}: 232'.format(infiles[1])
+            in err)
+    assert 'Total estimated number of unique 20-mers: 4170' in err
diff --git a/tests/test_streaming_io.py b/tests/test_streaming_io.py
new file mode 100644
index 0000000..6ba7ef9
--- /dev/null
+++ b/tests/test_streaming_io.py
@@ -0,0 +1,451 @@
+#
+# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+#
+
+# important note -- these tests do not contribute to code coverage, because
+# of the use of subprocess to execute.  Most script tests should go into
+# test_scripts.py for this reason.
+
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import khmer
+import screed
+from . import khmer_tst_utils as utils
+from .khmer_tst_utils import scriptpath, run_shell_cmd
+from .test_scripts import _make_counting
+import os.path
+import difflib
+
+
+def files_are_equal(a, b):
+    al = open(a).readlines()
+    bl = open(b).readlines()
+
+    return al == bl
+
+
+def diff_files(a, b):
+    al = open(a).readlines()
+    bl = open(b).readlines()
+
+    results = "\n".join(difflib.context_diff(al, bl, fromfile=a, tofile=b))
+    return results
+
+
+def test_interleave_split_1():
+    in1 = utils.get_test_data('paired.fq.1')
+    in2 = utils.get_test_data('paired.fq.2')
+
+    out1 = utils.get_temp_filename('a.fa')
+    out2 = utils.get_temp_filename('b.fa')
+
+    cmd = """
+       {scripts}/interleave-reads.py {in1} {in2} -o -             |
+       {scripts}/split-paired-reads.py -1 {out1} -2 {out2} -
+    """
+
+    cmd = cmd.format(scripts=scriptpath(),
+                     in1=in1, in2=in2,
+                     out1=out1, out2=out2)
+
+    run_shell_cmd(cmd)
+
+    assert files_are_equal(in1, out1), diff_files(in1, out1)
+    assert files_are_equal(in2, out2), diff_files(in2, out2)
+
+
+def test_interleave_split_2_fail():
+    in1 = utils.get_test_data('paired.fq.1')
+    in2 = utils.get_test_data('paired.fq.2')
+
+    out1 = utils.get_temp_filename('a.fa')
+    out2 = utils.get_temp_filename('b.fa')
+
+    cmd = """
+       {scripts}/interleave-reads.py {in1} {in2} -o -             |
+       {scripts}/split-paired-reads.py -
+    """
+
+    cmd = cmd.format(scripts=scriptpath(),
+                     in1=in1, in2=in2,
+                     out1=out1, out2=out2)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filenames must be provided." \
+           in err, err
+
+
+def test_extract_paired_pe():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out_test = utils.get_test_data('paired-mixed.fq.pe')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-paired-reads.py - -p - -s /dev/null > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert files_are_equal(out1, out_test), diff_files(out1, out_test)
+
+
+def test_extract_paired_se():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out_test = utils.get_test_data('paired-mixed.fq.se')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-paired-reads.py - -p /dev/null -s - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert files_are_equal(out1, out_test), diff_files(out1, out_test)
+
+
+def test_extract_paired_se_fail():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out_test = utils.get_test_data('paired-mixed.fq.se')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-paired-reads.py -p /dev/null - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filenames must be provided." \
+           in err, err
+
+
+def test_norm_by_median_1():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out_test = utils.get_test_data('paired-mixed.fq.pe')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-paired-reads.py - -p - -s /dev/null |
+       {scripts}/normalize-by-median.py - -o - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert files_are_equal(out1, out_test), diff_files(out1, out_test)
+
+
+def test_norm_by_median_2_fail():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out_test = utils.get_test_data('paired-mixed.fq.pe')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-paired-reads.py - -p - -s /dev/null |
+       {scripts}/normalize-by-median.py -p - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename must be provided with"\
+           in err, err
+
+
+def test_sample_reads_randomly_1():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/sample-reads-randomly.py - -o - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert files_are_equal(in1, out1), diff_files(in1, out1)
+
+
+def test_sample_reads_randomly_2_fail():
+    in1 = utils.get_test_data('paired-mixed.fq')
+    out1 = utils.get_temp_filename('a.fq')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/sample-reads-randomly.py - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename must be provided with"\
+           in err, err
+
+
+def test_extract_long_sequences_1():
+    in1 = utils.get_test_data('paired-mixed.fa')
+    out1 = utils.get_temp_filename('a.fa')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/extract-long-sequences.py - -l 10 > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    countlines = sum(1 for line in open(out1))
+    assert countlines == 22, countlines
+
+
+def test_fastq_to_fasta_1():
+    in1 = utils.get_test_data('test-fastq-reads.fq')
+    out1 = utils.get_temp_filename('clean.fa')
+    out_test = utils.get_test_data('test-fastq-reads.fa')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/fastq-to-fasta.py - -o - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+    assert files_are_equal(out1, out_test), diff_files(out1, out_test)
+
+
+def test_load_into_counting_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.ct')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/load-into-counting.py -x 1e3 -N 2 -k 20 {out1} - \
+       2> /dev/null
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+    print(cmd)
+
+    (status, out, err) = run_shell_cmd(cmd)
+    assert os.path.exists(out1)
+    khmer.load_counting_hash(out1)
+
+
+def test_load_graph_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.ct')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/load-graph.py -x 1e3 -N 2 -k 20 {out1} - \
+       2> /dev/null
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+    print(cmd)
+
+    (status, out, err) = run_shell_cmd(cmd)
+    assert os.path.exists(out1 + '.pt')
+    khmer.load_hashbits(out1 + '.pt')
+
+
+def test_filter_abund_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.abundfilt')
+
+    countgraph = _make_counting(in1, K=17)
+
+    cmd = """
+       cat {in1} |
+       {scripts}/filter-abund.py {countgraph} - -o - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1,
+                     countgraph=countgraph)
+
+    run_shell_cmd(cmd)
+
+    assert os.path.exists(out1)
+    seqs = set([r.sequence for r in screed.open(out1)])
+
+    assert len(seqs) == 1, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+
+def test_filter_abund_2_fail():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.abundfilt')
+
+    countgraph = _make_counting(in1, K=17)
+
+    cmd = """
+       cat {in1} |
+       {scripts}/filter-abund.py {countgraph} - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1,
+                     countgraph=countgraph)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename must be provided with"\
+           in err, err
+
+
+def test_abundance_dist_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.dist')
+
+    countgraph = _make_counting(in1, K=17)
+    assert os.path.exists(countgraph)
+
+    cmd = """
+       cat {in1} |
+       {scripts}/abundance-dist.py -z {countgraph} - - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1,
+                     countgraph=countgraph)
+
+    run_shell_cmd(cmd)
+
+    assert os.path.exists(out1)
+    with open(out1) as fp:
+        line = fp.readline().strip()
+        line = fp.readline().strip()
+        assert line == '1,96,96,0.98', line
+        line = fp.readline().strip()
+        assert line == '1001,2,98,1.0', line
+
+
+def test_trim_low_abund_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.abundtrim')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/trim-low-abund.py -k 17 -x 1e7 -N 2 - -o - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert os.path.exists(out1)
+    seqs = set([r.sequence for r in screed.open(out1)])
+
+    assert len(seqs) == 1, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+
+def test_trim_low_abund_2_fail():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.abundtrim')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/trim-low-abund.py -k 17 -x 1e7 -N 2 - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    (status, out, err) = run_shell_cmd(cmd, fail_ok=True)
+    assert status != 0
+    assert "Accepting input from stdin; output filename must be provided with"\
+           in err, err
+
+
+def test_count_median_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.counts')
+
+    countgraph = _make_counting(in1, K=8)
+    cmd = """
+       cat {in1} |
+       {scripts}/count-median.py {countgraph} - - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), countgraph=countgraph,
+                     in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+
+    assert os.path.exists(out1), out1
+    data = [x.strip() for x in open(out1)]
+    data = set(data)
+    assert len(data) == 3, data
+    assert 'seq,1001,1001.0,0.0,18' in data
+    assert '895:1:37:17593:9954/1,1,103.803741455,303.702941895,114' in data
+
+
+def test_readstats_1():
+    in1 = utils.get_test_data('test-abund-read-2.fa')
+    out1 = utils.get_temp_filename('out.stats')
+
+    cmd = """
+       cat {in1} |
+       {scripts}/readstats.py --csv - > {out1}
+    """
+
+    cmd = cmd.format(scripts=scriptpath(), in1=in1, out1=out1)
+
+    run_shell_cmd(cmd)
+    assert '18114,1001,18.1,-' in open(out1).read(), open(out1).read()
+
+
+def test_unique_kmers_stream_out_fasta():
+    infile = utils.get_test_data('random-20-a.fa')
+
+    cmd = "{scripts}/unique-kmers.py -k 20 -e 0.01 --stream-out {infile}"
+    cmd = cmd.format(scripts=scriptpath(), infile=infile)
+
+    (status, out, err) = run_shell_cmd(cmd)
+
+    expected = ('Estimated number of unique 20-mers in {infile}: 3950'
+                .format(infile=infile))
+    assert expected in err
+    assert 'Total estimated number of unique 20-mers: 3950' in err
+
+    assert '>45' in out
+    assert "ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA" in out
+
+
+def test_unique_kmers_stream_out_fastq_with_N():
+    infile = utils.get_test_data('test-filter-abund-Ns.fq')
+
+    cmd = "{scripts}/unique-kmers.py -k 20 -e 0.01 --stream-out {infile}"
+    cmd = cmd.format(scripts=scriptpath(), infile=infile)
+
+    (status, out, err) = run_shell_cmd(cmd)
+
+    expected = ('Estimated number of unique 20-mers in {infile}: 94'
+                .format(infile=infile))
+    assert expected in err
+    assert 'Total estimated number of unique 20-mers: 94' in err
+
+    assert '@895:1:37:17593:9954 1::FOO_withN' in out
+    assert "GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGNGACAGCAGCCGCAGCTGTCGTCA" in out
+    assert "##########################################################" in out
diff --git a/tests/test_subset_graph.py b/tests/test_subset_graph.py
index b5a4209..44f7569 100644
--- a/tests/test_subset_graph.py
+++ b/tests/test_subset_graph.py
@@ -6,7 +6,7 @@ from __future__ import absolute_import
 # the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-# pylint: disable=missing-docstring
+# pylint: disable=missing-docstring,invalid-name,unused-variable,no-member
 import khmer
 import screed
 
@@ -266,7 +266,7 @@ class Test_SaveLoadPmap(object):
             try:
                 a = ht.load_subset_partitionmap(outfile3)
                 assert 0, "this should not pass"
-            except IOError as err:
+            except OSError as err:
                 print(str(err), i)
 
     def test_save_load_merge_2(self):
@@ -306,7 +306,7 @@ class Test_SaveLoadPmap(object):
         try:
             a = ht.load_subset_partitionmap('this does not exist')
             assert 0, "this should not succeed"
-        except IOError as e:
+        except OSError as e:
             print(str(e))
 
     def test_save_merge_from_disk(self):
@@ -385,7 +385,7 @@ class Test_SaveLoadPmap(object):
         try:
             ht.merge_subset_from_disk(outfile1)
             assert 0, "this should fail"
-        except IOError as e:
+        except OSError as e:
             print(str(e))
 
     def test_merge_from_disk_file_bad_type(self):
@@ -395,7 +395,7 @@ class Test_SaveLoadPmap(object):
         try:
             ht.merge_subset_from_disk(infile)
             assert 0, "this should fail"
-        except IOError as e:
+        except OSError as e:
             print(str(e))
 
     def test_merge_from_disk_file_version(self):
@@ -405,7 +405,7 @@ class Test_SaveLoadPmap(object):
         try:
             ht.merge_subset_from_disk(infile)
             assert 0, "this should fail"
-        except IOError as e:
+        except OSError as e:
             print(str(e))
 
     def test_save_merge_from_disk_ksize(self):
@@ -428,7 +428,7 @@ class Test_SaveLoadPmap(object):
         try:
             ht.merge_subset_from_disk(outfile1)
             assert 0, "this should fail"
-        except IOError as e:
+        except OSError as e:
             print(str(e))
 
 
@@ -499,7 +499,7 @@ def test_save_load_on_graph_truncate():
         try:
             a = ht.load_partitionmap(outfile3)
             assert 0, "this should not pass"
-        except IOError as err:
+        except OSError as err:
             print(str(err), i)
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/khmer.git