[med-svn] [python-screed] 01/03: Imported Upstream version 0.9

Sun Jun 14 08:15:14 UTC 2015

This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository python-screed.

commit 3729975d8e28f8723f14f1510c74c6d49a5bc8fb
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date:   Thu Jun 11 16:37:27 2015 -0700

    Imported Upstream version 0.9
---
 .travis.yml                                        |  12 +
 ChangeLog                                          |  80 +++
 MANIFEST.in                                        |   5 +-
 Makefile                                           |  10 +-
 README.md                                          |   4 +-
 bigtests/__init__.py                               |   4 +-
 doc/{CODE_OF_CONDUCT.txt => CODE_OF_CONDUCT.rst}   |   0
 doc/{LICENSE.txt => LICENSE.rst}                   |   0
 doc/conf.py                                        |   4 +-
 doc/dev/CODE_OF_CONDUCT.rst                        |   1 -
 ...review.txt => coding-guidelines-and-review.rst} |   0
 doc/dev/{index.txt => index.rst}                   |   0
 ...release-checklist.txt => release-checklist.rst} |  57 +-
 doc/{example.txt => example.rst}                   |   0
 doc/{index.txt => index.rst}                       |   2 +-
 .../{RELEASE-0.5.txt => RELEASE-0.5.rst}           |   0
 doc/release-notes/RELEASE-0.8.md                   |  20 +-
 doc/release-notes/RELEASE-0.8.rst                  |  70 ++
 doc/release-notes/RELEASE-0.9.md                   |  41 ++
 doc/release-notes/RELEASE-0.9.rst                  |  72 ++
 doc/release-notes/{index.txt => index.rst}         |   1 +
 doc/screed.html                                    | 743 ---------------------
 doc/{screed.txt => screed.rst}                     |  20 +-
 doc/user/known-issues.rst                          |  18 +
 doc/user/known-issues.txt                          |  26 -
 screed/__init__.py                                 |  24 +-
 screed/_version.py                                 | 102 ++-
 screed/conversion.py                               |  18 +-
 screed/createscreed.py                             |   3 +-
 screed/dna.py                                      |  16 +-
 screed/dump_to_fasta.py                            |   5 +-
 screed/dump_to_fastq.py                            |   5 +-
 screed/fadbm.py                                    |   2 +-
 screed/fasta.py                                    |  17 +-
 screed/fastq.py                                    |  26 +-
 screed/fqdbm.py                                    |   8 +-
 screed/hava.py                                     |  19 +-
 screed/openscreed.py                               | 181 +++--
 screed/pygr_api.py                                 |   6 +-
 screed/screedRecord.py                             |  34 +-
 screed/seqparse.py                                 |  22 +-
 screed/tests/havaGen.py                            |   3 +-
 screed/tests/screed_tst_utils.py                   |  49 ++
 screed/tests/{ => test-data}/empty.fa              |   0
 screed/tests/{ => test-data}/test-whitespace.fa    |   0
 screed/tests/{ => test-data}/test.fa               |   0
 screed/tests/{ => test-data}/test.fa.bz2           | Bin
 screed/tests/{ => test-data}/test.fa.gz            | Bin
 screed/tests/{ => test-data}/test.fa.zip           | Bin
 screed/tests/{ => test-data}/test.fastq            |   0
 screed/tests/{ => test-data}/test.fastq.bz2        | Bin
 screed/tests/{ => test-data}/test.fastq.gz         | Bin
 screed/tests/{ => test-data}/test.hava             |   0
 screed/tests/test_convert.py                       |  14 +-
 screed/tests/test_dictionary.py                    |  23 +-
 screed/tests/test_fasta.py                         |  42 +-
 screed/tests/test_fasta_recover.py                 |  13 +-
 screed/tests/test_fastq.py                         |  50 +-
 screed/tests/test_fastq_recover.py                 |  13 +-
 screed/tests/test_hava_methods.py                  |   6 +-
 screed/tests/test_open.py                          |  54 +-
 screed/tests/test_open_cm.py                       |  96 +++
 screed/tests/test_pygr_api.py                      |   4 +-
 screed/tests/test_shell.py                         |  16 +-
 screed/tests/test_streaming.py                     |  32 +-
 screed/utils.py                                    |   7 +
 setup.py                                           |   5 +-
 tox.ini                                            |   9 +-
 versioneer.py                                      | 295 +++++---
 69 files changed, 1268 insertions(+), 1141 deletions(-)

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..bde74d3
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,12 @@
+language: python
+python: 2.7
+
+env:
+  - TOX_ENV=py27
+  - TOX_ENV=py33
+  - TOX_ENV=py34
+
+install:
+  - pip install tox
+
+script: tox -e $TOX_ENV
diff --git a/ChangeLog b/ChangeLog
index 4575486..12f1302 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,83 @@
+2015-06-10  Michael R. Crusoe  <crusoe at ucdavis.edu>
+
+   * doc/user/known-issues.rst: removed two fixed issues
+   * doc/screed.rst: updated install & test instructions
+   * doc/screed.html: removed un-needed file
+
+2015-06-05  Titus Brown  <titus at idyll.org>
+
+   * screed/{fasta.py,fastq.py,seqparse.py}: Set parse_description default
+   to False.
+   * screed/tests/test*.py: updated tests appropriately.
+
+2015-06-05  Luiz Irber  <screed at luizirber.org>
+
+   * screed/screedRecord.py: Simplify implementation of record slicing.
+   * screed/tests/test_fast{a,q}.py: Loop over distinct slices during test.
+
+2015-06-05  Michael Wright  <wrigh517 at msu.edu>
+
+   * screed/screedRecord.py: Allow slicing of screed records to fix issue #768
+
+2015-06-05  en zyme  <en_zyme at outlook.com>
+
+   * screed/tests/fastq.py: check for empty line in two places
+
+2015-05-29  Luiz Irber  <screed at luizirber.org>
+
+   * screed/openscreed.py: Add missing "close" method to context manager.
+
+2015-05-27  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * MANIFEST.in: ship the recently relocated test data, fixed reference to
+   renamed LICENSE file
+   * doc/dev/CODE_OF_CONDUCT.rst: drop unused symlink
+   * doc/dev/release-checklist.rst: fix line wrap
+
+2015-05-12  Luiz Irber  <screed at luizirber.org>
+
+   * screed/openscreed.py: Implement open as a context manager, keep backward
+   compatibility.
+   * screed/tests/test_open_cm.py: Add same tests as test_open.py, but using
+   a context manager to make sure file is closed after being used.
+
+2015-04-15  Thomas Fenzl  <thomas.fenzl at gmx.net>
+
+   * screed/tests/screed_tst_utils.py: removed unnecessary import
+   * screed/tests/test_streaming.py: changed execution order to handle
+   missing import files better
+   * screed/openscreed.py: pylint-ified
+
+2015-04-15  Thomas Fenzl  <thomas.fenzl at gmx.net>
+
+   * Makefile: added setup.py develop to test goal
+   * screed/openscreed.py,screed/tests/test_open.py: added handling of '-'
+
+2015-04-09  Sarah Guermond  <sarah.guermond at gmail.com>
+
+   * screed/screedRecord.py: renamed _screed_record_dict() to Rename()
+   * screed/__init__.py: added import for Record
+   * screed/fasta.py: changed _screed_record_dict() to Rename()
+   * screed/fastq.py: changed _screed_record_dict() to Rename()
+
+2015-04-09  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * doc/dev/release-checklist.txt: added "making final release" notes
+   * Makefile: copied over @mr-c's md-to-rst release notes conversion target
+   * doc/dev/release-notes/RELEASE-0.8.txt: added rst version of release notes 
+   for sphinx
+   * doc/dev/release-notes/index.txt: added rst version of 0.8 release notes to
+   toctree
+
+2015-04-07  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * screed/tests/test_{dictionary, fasta, fasta_recover, fastq, fastq_recover,
+   hava_methods, shell}.py: changed tests to use tempdirs
+   * screed/tests/screed_tst_utils.py: copied in khmer test utils
+   * screed/tests/{empty.fa, test-whitespace.fa, test.fa, test.fa.bz2,
+   test.fa.gz, test.fa.zip, test.fastq, test.fastq.bz2, test.hava}: moved test
+   data to screed/tests/test-data/ directory
+
 2015-04-04  Jacob Fenton  <bocajnotnef at gmail.com>
 
    * doc/dev/release{.txt,-checklist.txt}: renamed/restored release.txt to
diff --git a/MANIFEST.in b/MANIFEST.in
index 77721a1..8367566 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 include ChangeLog Makefile README.md
 include versioneer.py MANIFEST.in Doxyfile.in
-include TODO doc/LICENSE.txt
-include screed/_version.py screed/tests/empty.fa
+include TODO doc/LICENSE.rst
+include screed/_version.py
+graft screed/tests/test-data
diff --git a/Makefile b/Makefile
index c3294c1..7f5ee90 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,7 @@ all:
 	./setup.py build
 
 install-dependencies:
-	pip2 install --user --upgrade $(DEVPKGS) || pip2 install --upgrade \
-		$(DEVPKGS) || pip install --user --upgrade $(DEVPKGS) || pip \
-		install --upgrade $(DEVPKGS)
+	pip install --upgrade $(DEVPKGS) || pip2 install --upgrade $(DEVPKGS)
 
 install: FORCE
 	./setup.py build install
@@ -92,6 +90,11 @@ doxygen: doc/doxygen/html/index.html
 
 doc: build/sphinx/html/index.html
 
+convert-release-notes:
+		for file in doc/release-notes/*.md; do \
+				pandoc --from=markdown --to=rst $${file} > $${file%%.md}.rst; \
+				done
+
 build/sphinx/html/index.html: $(SOURCES) $(wildcard doc/*.txt) doc/conf.py all
 		./setup.py build_sphinx --fresh-env
 		@echo ''
@@ -105,6 +108,7 @@ doc/doxygen/html/index.html: ${CPPSOURCES} ${PYSOURCES}
 	doxygen
 
 test: FORCE
+	./setup.py develop
 	./setup.py nosetests --attr '!known_failing'
 
 sloccount.sc: ${PYSOURCES} Makefile
diff --git a/README.md b/README.md
index 9d43a21..97f4863 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,10 @@ screed -- short read sequence utils in Python.
 
 The official central repository for screed is:
 
-   https://github.com/ged-lab/screed
+   https://github.com/dib-lab/screed
 
 See http://readthedocs.org/docs/screed/en/latest/ for docs.
 
-Issues are tracked at https://github.com/ged-lab/khmer/issues.
+Issues are tracked at https://github.com/dib-lab/khmer/issues.
 
 [![Build Status](http://ci.ged.msu.edu/job/screed/badge/icon)](http://ci.ged.msu.edu/job/screed/)
diff --git a/bigtests/__init__.py b/bigtests/__init__.py
index 23c0523..c623fe9 100755
--- a/bigtests/__init__.py
+++ b/bigtests/__init__.py
@@ -32,8 +32,8 @@ def getfile(f):
     try:
         up = urllib.urlopen(base_url)
     except IOError:
-        raise IOError, "Error downloading testfiles, are you connected to " +\
-              "the internet?"
+        raise IOError("Error downloading testfiles, are you connected to " +\
+              "the internet?")
     fp.write(up.read())
     fp.close()
 
diff --git a/doc/CODE_OF_CONDUCT.txt b/doc/CODE_OF_CONDUCT.rst
similarity index 100%
rename from doc/CODE_OF_CONDUCT.txt
rename to doc/CODE_OF_CONDUCT.rst
diff --git a/doc/LICENSE.txt b/doc/LICENSE.rst
similarity index 100%
rename from doc/LICENSE.txt
rename to doc/LICENSE.rst
diff --git a/doc/conf.py b/doc/conf.py
index 196ec6d..e893f5a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -31,7 +31,7 @@ extensions = []
 templates_path = ['_templates']
 
 # The suffix of source filenames.
-source_suffix = '.txt'
+source_suffix = '.rst'
 
 # The encoding of source files.
 #source_encoding = 'utf-8-sig'
@@ -188,7 +188,7 @@ htmlhelp_basename = 'screeddoc'
 html_context = {
         "google_analytics_id" : 'UA-51731094-1',
         "disqus_shortname" : 'screed-docs',
-        #   "github_base_account" : 'ged-lab',
+        #   "github_base_account" : 'dib-lab',
         "github_project" : 'screed',
 }
 
diff --git a/doc/dev/CODE_OF_CONDUCT.rst b/doc/dev/CODE_OF_CONDUCT.rst
deleted file mode 120000
index b52bf14..0000000
--- a/doc/dev/CODE_OF_CONDUCT.rst
+++ /dev/null
@@ -1 +0,0 @@
-../../CODE_OF_CONDUCT.rst
\ No newline at end of file
diff --git a/doc/dev/coding-guidelines-and-review.txt b/doc/dev/coding-guidelines-and-review.rst
similarity index 100%
rename from doc/dev/coding-guidelines-and-review.txt
rename to doc/dev/coding-guidelines-and-review.rst
diff --git a/doc/dev/index.txt b/doc/dev/index.rst
similarity index 100%
rename from doc/dev/index.txt
rename to doc/dev/index.rst
diff --git a/doc/dev/release-checklist.txt b/doc/dev/release-checklist.rst
similarity index 67%
rename from doc/dev/release-checklist.txt
rename to doc/dev/release-checklist.rst
index 479dbea..4ea4317 100644
--- a/doc/dev/release-checklist.txt
+++ b/doc/dev/release-checklist.rst
@@ -19,7 +19,7 @@ Getting Started
 #. Start with a clean checkout::
 
         cd `mktemp -d`
-        git clone git at github.com:ged-lab/screed.git
+        git clone git at github.com:dib-lab/screed.git
         cd screed
 
 #. Install/update versioneer::
@@ -55,7 +55,7 @@ Getting Started
    Tag the release candidate with the new version prefixed by the letter 'v'::
 
         git tag v${new_version}-${rc}
-        git push --tags git at github.com:ged-lab/screed.git
+        git push --tags git at github.com:dib-lab/screed.git
 
 #. Test the release candidate::
 
@@ -70,21 +70,20 @@ Getting Started
         source bin/activate
         pip install nose
         git clone --depth 1 --branch v${new_version}-${rc} \
-                https://github.com/ged-lab/screed.git
+                https://github.com/dib-lab/screed.git
         cd screed
         make install
         nosetests screed --attr '!known_failing'
         make test
-        python -c 'import screed; print screed.__version__' # double-check
-        version number
+        python -c 'import screed; print screed.__version__' # double-check version number
 
 
         # Test via pip
         cd ../../testenv2
         source bin/activate
         pip install nose
-        pip install -e
-        git+https://github.com/ged-lab/screed.git@v${new_version}-${rc}#egg=screed
+        pip install -e \
+                git+https://github.com/dib-lab/screed.git@v${new_version}-${rc}#egg=screed
         cd src/screed
         make dist
         make install
@@ -132,7 +131,49 @@ Getting Started
    for screed are to run the khmer automated tests with the new version of
    screed installed and then to run the khmer acceptance tests.
 
-#. Make sure any release notes are merged into doc/release-notes/.
+#. Make sure any release notes are merged into doc/release-notes/. Release
+   notes should be written in the `.md` format to satisfy the requirements for
+   GitHub release notes. The `convert-release-notes` make target can be used to 
+   generate `.rst` files from the `.md` notes.
+
+
+How to make a final release
+===========================
+
+When you have a thoroughly tested release candidate, cut a release like so:
+
+#. Create the final tag and publish the new release on PyPI (requires an
+   authorized account) ::
+
+       cd ../../../screed
+       git tag v${new_version}
+       python setup.py register sdist upload
+
+#. Delete the release candidate tag and push the tag updates to GitHub::
+
+       git tag -d v${new_version}-${rc}
+       git push git at github.com:dib-lab/screed.git
+       git push --tags git at github.com:dib-lab/screed.git
+
+#. Add the release on GitHub, using the tag you just pushed. Name it "Version
+   X.Y.Z" and copy/paste in the release notes.
+
+#. Update the Read the Docs to point to the new version. Visit
+   https://readthedocs.org/builds/screed/ and ‘Build Version: master’ to pick up
+   the new tag. Once that build has finished check the “Activate” box next to
+   the new version at https://readthedocs.org/dashboard/screed/versions/ under
+   “Choose Active Versions”. Finally change the default version at
+   https://readthedocs.org/dashboard/screed/advanced/ to the new version.
+
+#. Delete any RC tags created:: 
+   
+       git tag -d ${new_version}-${rc}
+       git push origin :refs/tags/${new_version}-${rc}
+
+#. Tweet about the new release
+
+#. Send email including the release notes to khmer at lists.idyll.org and
+   khmer-announce at lists.idyll.org
 
 Notes on this document
 ======================
diff --git a/doc/example.txt b/doc/example.rst
similarity index 100%
rename from doc/example.txt
rename to doc/example.rst
diff --git a/doc/index.txt b/doc/index.rst
similarity index 96%
rename from doc/index.txt
rename to doc/index.rst
index ae92ca9..0e6057e 100644
--- a/doc/index.txt
+++ b/doc/index.rst
@@ -21,7 +21,7 @@ Contents:
    
    dev/index
    release-notes/index
-   user/known-issues.txt
+   user/known-issues
    
    CODE_OF_CONDUCT
    LICENSE
diff --git a/doc/release-notes/RELEASE-0.5.txt b/doc/release-notes/RELEASE-0.5.rst
similarity index 100%
rename from doc/release-notes/RELEASE-0.5.txt
rename to doc/release-notes/RELEASE-0.5.rst
diff --git a/doc/release-notes/RELEASE-0.8.md b/doc/release-notes/RELEASE-0.8.md
index d4170fa..23c1498 100644
--- a/doc/release-notes/RELEASE-0.8.md
+++ b/doc/release-notes/RELEASE-0.8.md
@@ -15,7 +15,7 @@ http://screed.readthedocs.org/en/v0.8/
 This release successfully installs and passes its unit tests on
 Ubuntu 14.04 and the latest release of Mac OS X 10 "Yosemite". It
 also passes the khmer acceptance tests as per the [eelpond testing
-protocol.](https://github.com/ged-lab/literate-resting/blob/master/kp/README.txt)
+protocol.](https://github.com/dib-lab/literate-resting/blob/master/kp/README.txt)
 
 This release of screed has renamed the 'accuracy' attribute of read records to
 'quality;' this API change will need to be adopted by all users wanting to
@@ -23,23 +23,23 @@ upgrade to this version. Unlike the khmer project, Screed is not currently
 under semantic versioning. It will be with the 1.0 release.
 
  - Screed now has automatic compression detection via magic bit sniffing
- for gzip and bzip2 compressed files (from @mr-c in ged-lab/khmer#432)
+ for gzip and bzip2 compressed files (from @mr-c in dib-lab/khmer#432)
  - Screed now supports streaming of uncompressed FASTA and FASTQ formatted
  nucleotide sequence data. bzip2 compressed FASTA and FASTQ formatted
  nucleotide sequence data can also be streamed but not gzip compressed
  FASTA and FASTQ formatted nucleotide sequence data. (from @mr-c, see
- ged-lab/khmer#633)
+ dib-lab/khmer#633)
  - Screed now has a Changelog, developer documentation and a code of conduct
- (from @ctb, @mr-c, @bocajnotnef in ged-lab/khmer#625)
+ (from @ctb, @mr-c, @bocajnotnef in dib-lab/khmer#625)
  - Versions are now autogenerated using git tags via Versioneer (from
  @bocajnotnef in cadceb5)
  - Documentation is now autogenerated using Doxygen (from @mr-c in d8ed05b)
 
 ## Notable bugs fixed/issues closed:
- - A khmer script was not accepting reads on the stdin ged-lab/khmer#633
+ - A khmer script was not accepting reads on the stdin dib-lab/khmer#633
  by @mr-c
  - screed returning the wrong version and breaking dev installs
- ged-lab/khmer#803 by @mr-c
+ dib-lab/khmer#803 by @mr-c
 
 
 ## Known Issues
@@ -48,15 +48,15 @@ These are all pre-existing
 
  - Screed records cannot be sliced requiring un-Pythonic techniques to achieve
  the same behavior. This will be included in a future release. This is being
- tracked in ged-lab/khmer#768
+ tracked in dib-lab/khmer#768
  - Screed self-tests do not use a temporary directory which causes
  tests run from package-based installs to fail. This is being tracked in
- ged-lab/khmer#748
+ dib-lab/khmer#748
  - Screed does not support gzip file streaming. This is an issue with Python
  2.x and will likely *not* be fixed in future releases. This is being tracked
- in ged-lab/khmer#700
+ in dib-lab/khmer#700
  - Screed is overly tolerant of spaces in fast{a,q} which is against spec. This
- is being tracked in ged-lab/khmer#108
+ is being tracked in dib-lab/khmer#108
 
 ## Contributors
 
diff --git a/doc/release-notes/RELEASE-0.8.rst b/doc/release-notes/RELEASE-0.8.rst
new file mode 100644
index 0000000..d54a8ac
--- /dev/null
+++ b/doc/release-notes/RELEASE-0.8.rst
@@ -0,0 +1,70 @@
+Release v0.8
+============
+
+We are pleased to announce the release of Screed v0.8. Screed is a
+database engine capable of storing and retrieving short-read sequence
+data and is designed to be fast and adaptable to different sequence file
+formats.
+
+This version of Screed contains developer documentation for contributing
+to the Screed project and a code of conduct for interacting with other
+contributors and project maintainers. Documentation is available at
+http://screed.readthedocs.org/en/v0.8/
+
+New items of note:
+------------------
+
+This release successfully installs and passes its unit tests on Ubuntu
+14.04 and the latest release of Mac OS X 10 "Yosemite". It also passes
+the khmer acceptance tests as per the `eelpond testing
+protocol. <https://github.com/dib-lab/literate-resting/blob/master/kp/README.txt>`__
+
+This release of screed has renamed the 'accuracy' attribute of read
+records to 'quality;' this API change will need to be adopted by all
+users wanting to upgrade to this version. Unlike the khmer project,
+Screed is not currently under semantic versioning. It will be with the
+1.0 release.
+
+-  Screed now has automatic compression detection via magic bit sniffing
+   for gzip and bzip2 compressed files (from @mr-c in dib-lab/khmer#432)
+-  Screed now supports streaming of uncompressed FASTA and FASTQ
+   formatted nucleotide sequence data. bzip2 compressed FASTA and FASTQ
+   formatted nucleotide sequence data can also be streamed but not gzip
+   compressed FASTA and FASTQ formatted nucleotide sequence data. (from
+   @mr-c, see dib-lab/khmer#633)
+-  Screed now has a Changelog, developer documentation and a code of
+   conduct (from @ctb, @mr-c, @bocajnotnef in dib-lab/khmer#625)
+-  Versions are now autogenerated using git tags via Versioneer (from
+   @bocajnotnef in cadceb5)
+-  Documentation is now autogenerated using Doxygen (from @mr-c in
+   d8ed05b)
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+-  A khmer script was not accepting reads on the stdin dib-lab/khmer#633
+   by @mr-c
+-  screed returning the wrong version and breaking dev installs
+   dib-lab/khmer#803 by @mr-c
+
+Known Issues
+------------
+
+These are all pre-existing
+
+-  Screed records cannot be sliced requiring un-Pythonic techniques to
+   achieve the same behavior. This will be included in a future release.
+   This is being tracked in dib-lab/khmer#768
+-  Screed self-tests do not use a temporary directory which causes tests
+   run from package-based installs to fail. This is being tracked in
+   dib-lab/khmer#748
+-  Screed does not support gzip file streaming. This is an issue with
+   Python 2.x and will likely *not* be fixed in future releases. This is
+   being tracked in dib-lab/khmer#700
+-  Screed is overly tolerant of spaces in fast{a,q} which is against
+   spec. This is being tracked in dib-lab/khmer#108
+
+Contributors
+------------
+
+ at bocajnotnef @mr-c @brtaylor92 @wrightmhw @kdmurray91 @luizirber @ctb
diff --git a/doc/release-notes/RELEASE-0.9.md b/doc/release-notes/RELEASE-0.9.md
new file mode 100644
index 0000000..ffac7e6
--- /dev/null
+++ b/doc/release-notes/RELEASE-0.9.md
@@ -0,0 +1,41 @@
+
+# Release v0.9
+
+We are pleased to announce the release of Screed v0.9. Screed is a database
+engine capable of storing and retrieving short-read sequence data and is
+designed to be fast and adaptable to different sequence file formats.
+
+This version of Screed features Python 3 syntax with compatibility with Python 2. Additional changes have broken backwards compatibility in several small ways in preparation for our 1.0 release and adoption of strict semantic versioning from there on out.
+
+It is also the first release since our move to the University of Davis, California and also under our new name, the Lab for Data Intensive Biology.
+
+Documentation is available at http://screed.readthedocs.org/en/v0.9/
+
+## New items of note:
+
+- Now a primarily Python 3 codebase with Python 2 compatibility. https://github.com/dib-lab/screed/pull/41 @luizirber & @mr-c
+
+- Tests now correctly run using temporary directories and the test data is now shipped allowing the tests to be run after installation. https://github.com/dib-lab/screed/pull/30 @bocajnotnef https://github.com/dib-lab/screed/pull/40 @mr-c
+- The private method `screed/screedRecord._screed_record_dict()` has been renamed to `screed.screedRecord.Record()`. This is **not** a backwards compatible change. https://github.com/dib-lab/screed/pull/35 @sguermond
+- `screed.open()` now accepts `-` as a synonym for STDIN and is now an (optional) context manager. It no longer defaults to parsing out a separate description from the name. The description field will br removed altogether from the next release. This is **not** a backwards compatible change. https://github.com/dib-lab/screed/pull/36 @anotherthomas  https://github.com/dib-lab/screed/pull/39 https://github.com/dib-lab/screed/pull/41 @luizirber https://github.com/dib-lab/screed/pull/43 @ctb 
+- The FASTQ parser was improved and it no longer hangs in the presence of empty lines. https://github.com/dib-lab/screed/pull/38 @proteasome
+- Screed records now slice correctly https://github.com/dib-lab/screed/pull/41 @wrightmhw @luizirber 
+
+
+## Other bugs fixed/issues closed:
+
+- Release notes are now a part of the documentation. https://github.com/dib-lab/screed/pull/33 @bocajnotnef 
+- A test was made more robust to prevent hangs. https://github.com/dib-lab/screed/pull/37 @anotherthomas 
+
+## Known Issues
+
+These are all pre-existing
+
+ - Screed does not support gzip file streaming. This is an issue with Python 2.x and will likely *not* be fixed in future releases. This is being tracked in ged-lab/khmer#700
+ - Screed is overly tolerant of spaces in fast{a,q} which is against spec. This is being tracked in ged-lab/khmer#108
+ 
+## Contributors
+
+ at luizirber @mr-c @bocajnotnef @ctb \*@proteasome \*@anotherthomas \*@sguermond 
+
+\* Indicates new contributors
diff --git a/doc/release-notes/RELEASE-0.9.rst b/doc/release-notes/RELEASE-0.9.rst
new file mode 100644
index 0000000..61ed94b
--- /dev/null
+++ b/doc/release-notes/RELEASE-0.9.rst
@@ -0,0 +1,72 @@
+Release v0.9
+============
+
+We are pleased to announce the release of Screed v0.9. Screed is a
+database engine capable of storing and retrieving short-read sequence
+data and is designed to be fast and adaptable to different sequence file
+formats.
+
+This version of Screed features Python 3 syntax with compatibility with
+Python 2. Additional changes have broken backwards compatibility in
+several small ways in preparation for our 1.0 release and adoption of
+strict semantic versioning from there on out.
+
+It is also the first release since our move to the University of Davis,
+California and also under our new name, the Lab for Data Intensive
+Biology.
+
+Documentation is available at http://screed.readthedocs.org/en/v0.9/
+
+New items of note:
+------------------
+
+-  Now a primarily Python 3 codebase with Python 2 compatibility.
+   https://github.com/dib-lab/screed/pull/41 @luizirber & @mr-c
+
+-  Tests now correctly run using temporary directories and the test data
+   is now shipped allowing the tests to be run after installation.
+   https://github.com/dib-lab/screed/pull/30 @bocajnotnef
+   https://github.com/dib-lab/screed/pull/40 @mr-c
+-  The private method ``screed/screedRecord._screed_record_dict()`` has
+   been renamed to ``screed.screedRecord.Record()``. This is **not** a
+   backwards compatible change.
+   https://github.com/dib-lab/screed/pull/35 @sguermond
+-  ``screed.open()`` now accepts ``-`` as a synonym for STDIN and is now
+   an (optional) context manager. It no longer defaults to parsing out a
+   separate description from the name. The description field will br
+   removed altogether from the next release. This is **not** a backwards
+   compatible change. https://github.com/dib-lab/screed/pull/36
+   @anotherthomas https://github.com/dib-lab/screed/pull/39
+   https://github.com/dib-lab/screed/pull/41 @luizirber
+   https://github.com/dib-lab/screed/pull/43 @ctb
+-  The FASTQ parser was improved and it no longer hangs in the presence
+   of empty lines. https://github.com/dib-lab/screed/pull/38 @proteasome
+-  Screed records now slice correctly
+   https://github.com/dib-lab/screed/pull/41 @wrightmhw @luizirber
+
+Other bugs fixed/issues closed:
+-------------------------------
+
+-  Release notes are now a part of the documentation.
+   https://github.com/dib-lab/screed/pull/33 @bocajnotnef
+-  A test was made more robust to prevent hangs.
+   https://github.com/dib-lab/screed/pull/37 @anotherthomas
+
+Known Issues
+------------
+
+These are all pre-existing
+
+-  Screed does not support gzip file streaming. This is an issue with
+   Python 2.x and will likely *not* be fixed in future releases. This is
+   being tracked in ged-lab/khmer#700
+-  Screed is overly tolerant of spaces in fast{a,q} which is against
+   spec. This is being tracked in ged-lab/khmer#108
+
+Contributors
+------------
+
+ at luizirber @mr-c @bocajnotnef @ctb \*@proteasome \*@anotherthomas
+\*@sguermond
+
+\* Indicates new contributors
diff --git a/doc/release-notes/index.txt b/doc/release-notes/index.rst
similarity index 91%
rename from doc/release-notes/index.txt
rename to doc/release-notes/index.rst
index bcf699c..42f33eb 100644
--- a/doc/release-notes/index.txt
+++ b/doc/release-notes/index.rst
@@ -9,3 +9,4 @@ Contents:
    :maxdepth: 1
 
    RELEASE-0.5
+   RELEASE-0.8
diff --git a/doc/screed.html b/doc/screed.html
deleted file mode 100644
index 12439d1..0000000
--- a/doc/screed.html
+++ /dev/null
@@ -1,743 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
-<title>screed - short read sequence database</title>
-<meta name="copyright" content="2008-2010 Michigan State University" />
-<meta name="authors" content="Alex Nolley  C. Titus Brown" />
-<style type="text/css">
-
-/*
-:Author: David Goodger (goodger at python.org)
-:Id: $Id: html4css1.css 5196 2007-06-03 20:25:28Z wiemann $
-:Copyright: This stylesheet has been placed in the public domain.
-
-Default cascading style sheet for the HTML output of Docutils.
-
-See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
-customize this style sheet.
-*/
-
-/* used to remove borders from tables and images */
-.borderless, table.borderless td, table.borderless th {
-  border: 0 }
-
-table.borderless td, table.borderless th {
-  /* Override padding for "table.docutils td" with "! important".
-     The right padding separates the table cells. */
-  padding: 0 0.5em 0 0 ! important }
-
-.first {
-  /* Override more specific margin styles with "! important". */
-  margin-top: 0 ! important }
-
-.last, .with-subtitle {
-  margin-bottom: 0 ! important }
-
-.hidden {
-  display: none }
-
-a.toc-backref {
-  text-decoration: none ;
-  color: black }
-
-blockquote.epigraph {
-  margin: 2em 5em ; }
-
-dl.docutils dd {
-  margin-bottom: 0.5em }
-
-/* Uncomment (and remove this text!) to get bold-faced definition list terms
-dl.docutils dt {
-  font-weight: bold }
-*/
-
-div.abstract {
-  margin: 2em 5em }
-
-div.abstract p.topic-title {
-  font-weight: bold ;
-  text-align: center }
-
-div.admonition, div.attention, div.caution, div.danger, div.error,
-div.hint, div.important, div.note, div.tip, div.warning {
-  margin: 2em ;
-  border: medium outset ;
-  padding: 1em }
-
-div.admonition p.admonition-title, div.hint p.admonition-title,
-div.important p.admonition-title, div.note p.admonition-title,
-div.tip p.admonition-title {
-  font-weight: bold ;
-  font-family: sans-serif }
-
-div.attention p.admonition-title, div.caution p.admonition-title,
-div.danger p.admonition-title, div.error p.admonition-title,
-div.warning p.admonition-title {
-  color: red ;
-  font-weight: bold ;
-  font-family: sans-serif }
-
-/* Uncomment (and remove this text!) to get reduced vertical space in
-   compound paragraphs.
-div.compound .compound-first, div.compound .compound-middle {
-  margin-bottom: 0.5em }
-
-div.compound .compound-last, div.compound .compound-middle {
-  margin-top: 0.5em }
-*/
-
-div.dedication {
-  margin: 2em 5em ;
-  text-align: center ;
-  font-style: italic }
-
-div.dedication p.topic-title {
-  font-weight: bold ;
-  font-style: normal }
-
-div.figure {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-div.footer, div.header {
-  clear: both;
-  font-size: smaller }
-
-div.line-block {
-  display: block ;
-  margin-top: 1em ;
-  margin-bottom: 1em }
-
-div.line-block div.line-block {
-  margin-top: 0 ;
-  margin-bottom: 0 ;
-  margin-left: 1.5em }
-
-div.sidebar {
-  margin: 0 0 0.5em 1em ;
-  border: medium outset ;
-  padding: 1em ;
-  background-color: #ffffee ;
-  width: 40% ;
-  float: right ;
-  clear: right }
-
-div.sidebar p.rubric {
-  font-family: sans-serif ;
-  font-size: medium }
-
-div.system-messages {
-  margin: 5em }
-
-div.system-messages h1 {
-  color: red }
-
-div.system-message {
-  border: medium outset ;
-  padding: 1em }
-
-div.system-message p.system-message-title {
-  color: red ;
-  font-weight: bold }
-
-div.topic {
-  margin: 2em }
-
-h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
-h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
-  margin-top: 0.4em }
-
-h1.title {
-  text-align: center }
-
-h2.subtitle {
-  text-align: center }
-
-hr.docutils {
-  width: 75% }
-
-img.align-left {
-  clear: left }
-
-img.align-right {
-  clear: right }
-
-ol.simple, ul.simple {
-  margin-bottom: 1em }
-
-ol.arabic {
-  list-style: decimal }
-
-ol.loweralpha {
-  list-style: lower-alpha }
-
-ol.upperalpha {
-  list-style: upper-alpha }
-
-ol.lowerroman {
-  list-style: lower-roman }
-
-ol.upperroman {
-  list-style: upper-roman }
-
-p.attribution {
-  text-align: right ;
-  margin-left: 50% }
-
-p.caption {
-  font-style: italic }
-
-p.credits {
-  font-style: italic ;
-  font-size: smaller }
-
-p.label {
-  white-space: nowrap }
-
-p.rubric {
-  font-weight: bold ;
-  font-size: larger ;
-  color: maroon ;
-  text-align: center }
-
-p.sidebar-title {
-  font-family: sans-serif ;
-  font-weight: bold ;
-  font-size: larger }
-
-p.sidebar-subtitle {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-p.topic-title {
-  font-weight: bold }
-
-pre.address {
-  margin-bottom: 0 ;
-  margin-top: 0 ;
-  font-family: serif ;
-  font-size: 100% }
-
-pre.literal-block, pre.doctest-block {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-span.classifier {
-  font-family: sans-serif ;
-  font-style: oblique }
-
-span.classifier-delimiter {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-span.interpreted {
-  font-family: sans-serif }
-
-span.option {
-  white-space: nowrap }
-
-span.pre {
-  white-space: pre }
-
-span.problematic {
-  color: red }
-
-span.section-subtitle {
-  /* font-size relative to parent (h1..h6 element) */
-  font-size: 80% }
-
-table.citation {
-  border-left: solid 1px gray;
-  margin-left: 1px }
-
-table.docinfo {
-  margin: 2em 4em }
-
-table.docutils {
-  margin-top: 0.5em ;
-  margin-bottom: 0.5em }
-
-table.footnote {
-  border-left: solid 1px black;
-  margin-left: 1px }
-
-table.docutils td, table.docutils th,
-table.docinfo td, table.docinfo th {
-  padding-left: 0.5em ;
-  padding-right: 0.5em ;
-  vertical-align: top }
-
-table.docutils th.field-name, table.docinfo th.docinfo-name {
-  font-weight: bold ;
-  text-align: left ;
-  white-space: nowrap ;
-  padding-left: 0 }
-
-h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
-h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
-  font-size: 100% }
-
-ul.auto-toc {
-  list-style-type: none }
-
-</style>
-</head>
-<body>
-<div class="document" id="screed-short-read-sequence-database">
-<h1 class="title">screed - short read sequence database</h1>
-<table class="docinfo" frame="void" rules="none">
-<col class="docinfo-name" />
-<col class="docinfo-content" />
-<tbody valign="top">
-<tr><th class="docinfo-name">Copyright:</th>
-<td>2008-2010 Michigan State University</td></tr>
-<tr><th class="docinfo-name">Authors:</th>
-<td>Alex Nolley
-<br />C. Titus Brown</td></tr>
-<tr><th class="docinfo-name">Contact:</th>
-<td><a class="first reference external" href="mailto:badmit@gmail.com">badmit@gmail.com</a>, <a class="last reference external" href="mailto:ctb@msu.edu">ctb@msu.edu</a></td></tr>
-<tr class="field"><th class="docinfo-name">License:</th><td class="field-body">BSD</td>
-</tr>
-</tbody>
-</table>
-<!-- contents: -->
-<div class="section" id="notes-on-this-document">
-<h1>Notes on this document</h1>
-<p>This is the default documentation for screed. Some doctests are included
-in the file 'example.txt'. The examples in this file are meant for humans
-only: they will not work in doctests.</p>
-</div>
-<div class="section" id="introduction">
-<h1>Introduction</h1>
-<p>screed is a database generation and querying package made to be used with gene
-sequences generated by Solexa machines, namely the FASTQ format, though FASTA
-is supported by default as well. Values
-such as sequence name, sequence description and the sequence itself can be
-retrieved from these databases.</p>
-</div>
-<div class="section" id="getting-going">
-<h1>Getting Going</h1>
-<p>The following software packages are required to run screed:</p>
-<ul class="simple">
-<li>Python 2.4 or newer</li>
-<li>nose (for testing)</li>
-</ul>
-<div class="section" id="downloading">
-<h2>Downloading</h2>
-<dl class="docutils">
-<dt>You will need git to download a copy from the public git repository:</dt>
-<dd>git clone git://github.com/acr/screed.git</dd>
-</dl>
-</div>
-<div class="section" id="installing">
-<h2>Installing</h2>
-<p>Assuming you have already downloaded the package, this is how to install:</p>
-<pre class="literal-block">
-$ python setup.py install
-</pre>
-<p>To run the optional tests type:</p>
-<pre class="literal-block">
-$ python -m screed.tests.__main__
-</pre>
-</div>
-</div>
-<div class="section" id="quick-start">
-<h1>Quick-Start</h1>
-<div class="section" id="creating-a-database-from-the-api">
-<h2>Creating a database from the API</h2>
-<p>From a Python prompt type:</p>
-<pre class="literal-block">
->>> import screed
->>> screed.read_fasta_sequences('screed/tests/test.fa')
-</pre>
-<p>That command just parsed the FASTA file 'screed/tests/test.fa' into a
-screed-database named 'screed/tests/test.fa_screed'. The screed database
-is independent from the text file it was derived from, so moving, renaming
-or deleting the 'screed/tests/test.fa' file will not affect
-screed's operation. To create a screed database from a FASTQ file the
-syntax is similar:</p>
-<pre class="literal-block">
->>> screed.read_fastq_sequences('screed/tests/test.fastq')
-</pre>
-</div>
-<div class="section" id="creating-a-database-from-a-script">
-<h2>Creating a database from a script</h2>
-<p>To create a screed db from a FASTQ file at the shell:</p>
-<pre class="literal-block">
-$ ./fqdbm screed/tests/test.fastq
-</pre>
-<p>Similarly, to create a screed db from a fasta file:</p>
-<pre class="literal-block">
-$ ./fadbm screed/tests/test.fa
-</pre>
-<p>Alternately, if the screed module is in your PATH:</p>
-<pre class="literal-block">
-$ python -m screed.fadbm <fasta file>
-$ python -m screed.fqdbm <fastq file>
-</pre>
-<p>where <fast* file> is the path to a sequence file.</p>
-<p>screed natively supports FASTA and FASTQ database creation. If you have a new
-sequence you want screed to work with, see the section below on Writing
-Custom Sequence Parsers.</p>
-</div>
-</div>
-<div class="section" id="reading-databases">
-<h1>Reading databases</h1>
-<p>The class ScreedDB is used to read screed databases, regardless of what file
-format they were derived from (FASTA/FASTQ/hava/etc..). One reader to
-rule them all!</p>
-<div class="section" id="opening">
-<h2>Opening</h2>
-<p>In the Python environment, import the ScreedDB class and load
-some databases:</p>
-<pre class="literal-block">
->>> from screed import ScreedDB
->>> fadb = ScreedDB('screed/tests/test.fa')
->>> fqdb = ScreedDB('screed/tests/test.fastq')
-</pre>
-<p>Notice how you didn't need to write the '_screed' at the end of the file names?
-screed automatically adds that to the file name if you didn't.</p>
-</div>
-<div class="section" id="dictionary-interface">
-<h2>Dictionary Interface</h2>
-<p>Since screed emulates a read-only dictionary interface, any methods that
-don't modify a dictionary are supported:</p>
-<pre class="literal-block">
->>> fadb.keys()
->>> fqdb.keys()
-</pre>
-<p>Each record in the database contains 'fields' such as name and sequence
-information. If the database was derived from a FASTQ file, quality and
-optional annotation strings are included. Conversely, FASTA-derived
-databases have a description field.</p>
-<p>To retrieve the names of records in the database:</p>
-<pre class="literal-block">
->>> names = fadb.keys()
-</pre>
-<p>Length of the databases are easily found:</p>
-<pre class="literal-block">
->>> print len(fadb)
-22
->>> print len(fqdb)
-125
-</pre>
-</div>
-<div class="section" id="retrieving-records">
-<h2>Retrieving Records</h2>
-<p>A record is the standard container unit in screed. Each has 'fields' that
-vary slightly depending on what kind of file the database was derived from.
-For instance, a FASTQ-derived screed database has an id, a name,
-a quality score and a sequence. A FASTA-derived screed database has an
-id, name, description and a sequence.</p>
-<p>Retrieving whole records:</p>
-<pre class="literal-block">
->>> records = []
->>> for record in fadb.itervalues():
->>>     records.append(record)
-</pre>
-<p>What is returned is a dictionary of fields. The names of fields
-are keys into this dictionary with the actual information as values.
-For example:</p>
-<pre class="literal-block">
->>> record = fadb[fadb.keys()[0]]
->>> index = record['id']
->>> name = record['name']
->>> description = record['description']
->>> sequence = record['sequence']
-</pre>
-<p>What this does is retrieve the first record object in the screed database,
-then retrieve the index, name, description and sequence from the record
-object using standard dictionary key -> value pairs.</p>
-</div>
-<div class="section" id="retrieving-partial-sequences-slicing">
-<h2>Retrieving Partial Sequences (slicing)</h2>
-<p>screed supports the concept of retrieving a 'slice' or a subset of a
-sequence string. The motivation is speed: if you have a database
-entry with a very long sequence string but only want a small portion
-of the string, it is faster to retrieve only the portion than to
-retrieve the entire string and then perform standard Python string
-slicing.</p>
-<p>By default, screed's FASTA database creator sets up the 'sequence'
-column to support slicing. For example, if you have an entry with
-name 'someSeq' which has a 10K long sequence, and you want a
-slice of the sequence spanning positions 4000 to 4080:</p>
-<pre class="literal-block">
->>> seq = db['someSeq'].sequence
->>> slice = seq[4000:4080]
-</pre>
-<p>This is much faster than say:</p>
-<pre class="literal-block">
->>> seq = str(db['someSeq'].sequence)
->>> slice = seq[4000:4080]
-</pre>
-<p>Because deep down, less information is being read off the disk.
-The str() method above causes the entire sequence to be retrieved
-as a string. Then Python slicing is done on the string 'seq' and
-the subset stored in 'slice'.</p>
-</div>
-<div class="section" id="retrieving-via-index">
-<h2>Retrieving Via Index</h2>
-<p>Sometimes you don't care what the name of a sequence is; you're only
-interested in its position in the database. In these cases, retrieval via
-index is the method you'll want to use:</p>
-<pre class="literal-block">
->>> record = fqdb.loadRecordByIndex(5)
-</pre>
-<p>An index is like an offset into the database. The order records were kept in
-the FASTA or FASTQ file determines the index in their resulting screed database.
-The first record in a sequence file will have an index of 0, the
-second, an index of 1 and so on.</p>
-</div>
-</div>
-<div class="section" id="writing-custom-sequence-parsers">
-<h1>Writing Custom Sequence Parsers</h1>
-<p>screed is built to be adaptable to new kinds of file sequence formats.
-Included with screed are parsers for handling FASTA and FASTQ sequence
-file types, though if you need screed to work with a new format,
-all you need to do is write a new parser.</p>
-<div class="section" id="field-roles">
-<h2>Field Roles</h2>
-<p>Each field in a screed database is assigned a role. These roles describe what
-kind of information is stored in their field. Right now there are only 4
-different roles in a screed database: the text role, the sliceable role,
-the indexed key role and the primary key role. All roles are defined in the
-file: screed/DBConstants.py</p>
-<p>The text role (DBConstants._STANDARD_TEXT) is the role most fields in a
-database will have. This role tells screed that the associated field is
-storing standard textual data. Nothing special.</p>
-<p>The sliceable role (DBConstants._SLICEABLE_TEXT) is a role that can be
-assigned to long sequence fields. screed's default FASTA parser defines
-the 'sequence' field with the sliceable role. When screed retrieves a field
-that has the sliceable role, it builds a special data structure that
-supports slicing into the text.</p>
-<p>The indexed key role (DBConstants._INDEXED_TEXT_KEY) is associated with
-exactly one of the fields in a screed database. In screed's FASTA and
-FASTQ parsers, this role is fulfilled by the 'name' field. This field
-is required because it is the field screed tells sqlite to index when
-creating the database and it is the field used for name look-ups when
-querying a screed database.</p>
-<p>The primary key role (DBConstants._PRIMARY_KEY_ROLE) is a role automatically
-associated with the 'id' field in each database. This field is always
-created with each screed database and always holds this role. You as a
-user of screed won't need to worry about this one.</p>
-</div>
-<div class="section" id="general-parsing-function-format">
-<h2>General Parsing Function Format</h2>
-<p>create_db is the function central to the creation of screed databases. This
-function accepts a file path, a tuple of field names and roles, and an
-iterator function. The file path describes where the screed database should
-go, the tuple contains the names of fields and their associated roles and
-the iterator function yields records in a dictionary format.</p>
-<p>This sub-section describes general steps for preparing and using screed with a
-custom sequence parser. Though they don't have to be, future sequence parsers
-should be located in the seqparse.py file for convenience.
-These steps will be described in the context of working from the Python shell.</p>
-<p>First import the create_db function:</p>
-<pre class="literal-block">
->>> from screed import create_db
-</pre>
-<p>The create_db class handles the formatting of screed databases and
-provides a simple interface for storing sequence data.</p>
-<p>Next the database fields and roles must be specified. The fields tell
-screed the names and order of the data fields inside each record. For instance,
-lets say our new sequence has types 'name', 'bar', and 'baz', all text. The
-tuple will be:</p>
-<pre class="literal-block">
->>> fields = (('name', DBConstants._INDEXED_TEXT_KEY),
-              ('bar', DBConstants._STANDARD_TEXT),
-              ('baz', DBConstants._STANDARD_TEXT))
-</pre>
-<p>Notice how 'name' is given the indexed key role and bar and baz are
-given text roles? If, for instance, you know 'baz' fields can be very long
-and you want to be able to retrieve slices of them, you could specify
-fields as:</p>
-<pre class="literal-block">
->>> fields = (('name', DBConstants._INDEXED_TEXT_KEY),
-              ('bar', DBConstants._STANDARD_TEXT),
-              ('baz', DBConstants._SLICEABLE_TEXT))
-</pre>
-<p>All screed databases come with an 'id' field, which is a sequential
-numbering order starting at 0 for the first record, 1 for the second, and
-so on. The names and number of the other fields are arbitrary with one
-restriction: one and only one of the fields must fulfill the indexed key role.</p>
-<p>Next, you need to setup an iterator function that will return records in
-a dictionary format. Have a look at the 'fastq_iter', 'fasta_iter', or
-'hava_iter' functions in the screed/fastq.py, screed/fasta.py, and
-screed/hava.py files, respectively for examples on how to write one of these.
-If you don't know what an iterator function is, the documentation on the
-Python website gives a good description:
-<a class="reference external" href="http://docs.python.org/library/stdtypes.html#iterator-types">http://docs.python.org/library/stdtypes.html#iterator-types</a>.</p>
-<p>Once the iterator function is written, it needs to be instantiated. In the
-context of the built-in parsing functions, this means opening a file and
-passing the file handle to the iterator function:</p>
-<pre class="literal-block">
->>> seqfile = open('path_to_seq_file', 'rb')
->>> iter_instance = myiter(seqfile)
-</pre>
-<p>Assuming that your iterator function is called 'myiter', this sets up an
-instance of it ready to use with create_db.</p>
-<p>Now the screed database is created with one command:</p>
-<pre class="literal-block">
->>> create_db('path_to_screed_db', fields, iter_instance)
-</pre>
-<p>If you want the screed database saved at 'path_to_screed_db'. If instead you
-want the screed database created in the same directory and with a
-similar file name as the sequence file, its OK to do this:</p>
-<pre class="literal-block">
->>> create_db('path_to_seq_file', fields, iter_instance)
-</pre>
-<p>create_db will just append '_screed' to the end of the file name and make
-a screed database at that file path so the original file won't be
-overwritten.</p>
-<dl class="docutils">
-<dt>When you're done the sequence file should be closed::</dt>
-<dd><pre class="first last doctest-block">
->>> seqfile.close()
-</pre>
-</dd>
-</dl>
-</div>
-<div class="section" id="using-the-built-in-sequence-iterator-functions">
-<h2>Using the Built-in Sequence Iterator Functions</h2>
-<p>This section shows how to use the 'fastq_iter' and 'fasta_iter' functions
-for returning records from a sequence file.</p>
-<p>These functions both take a file handle as the only argument and then return
-a dictionary for each record in the file containing names of fields and
-associated data. These functions are primarily used in conjunction with
-the db_create() function, but they can be useful by themselves.</p>
-<p>First, import the necessary module and open a text file containing sequences.
-For this example, the 'fastq_iter' function will be used:</p>
-<pre class="literal-block">
->>> import screed.fastq
->>> seqfile = open('path_to_seqfile', 'rb')
-</pre>
-<p>Now, the 'fastq_iter' can be instantiated and iterated over:</p>
-<pre class="literal-block">
->>> fq_instance = screed.fastq(seqfile)
->>> for record in fq_instance:
-...     print record.name
-</pre>
-<p>That will print the name of every sequence in the file. If instead you want
-to accumulate the sequences:</p>
-<pre class="literal-block">
->>> sequences = []
->>> for record in fq_instance:
-...     sequences.append(record.sequence)
-</pre>
-<p>These iterators are the core of screed's sequence modularity. If there is
-a new sequence format you want screed to work with, all it needs is its
-own iterator.</p>
-</div>
-<div class="section" id="error-checking-in-parsing-methods">
-<h2>Error checking in parsing methods</h2>
-<p>The existing FASTA/FASTQ parsing functions contain some error
-checking, such as making sure the file can be opened and checking correct
-data is being read. Though screed doesn't enforce this, it is strongly
-recommended to include error checking code in your parser. To remain
-non-specific to one file sequence type or another, the underlying screed
-library can't contain error checking code of this kind. If errors are not
-detected by the parsing function, they will be silently included into the
-database being built and could cause problems much later when trying to
-read from the database.</p>
-</div>
-</div>
-<div class="section" id="file-formats-as-understood-by-screed">
-<h1>File formats as understood by screed</h1>
-<p>While the screed database remains non-specific to file formats, the included
-FASTA and FASTQ parsers expect specific formats. These parsers attempt to
-handle the most common attributes of sequence files, though they can not
-support all features.</p>
-<div class="section" id="fastq">
-<h2>FASTQ</h2>
-<p>The FASTQ parsing function is read_fastq_sequences() and is located in the
-screed module.</p>
-<p>The first line in a record must begin with '@' and is
-followed by a record identifier (a name). An optional annotations string
-may be included after a space on the same line.</p>
-<p>The second line begins the sequence line(s) which may be line wrapped.
-screed defines no limit on the length of sequence lines and no length on
-how many sequence lines a record may contain.</p>
-<p>After the sequence line(s) comes a '+' character on a new line. Some
-FASTQ formats require the first line to be repeated after the '+'
-character, but since this adds no new information to the record,
-read_fastq_sequences() will ignore this if it is included.</p>
-<p>The quality line(s) is last. Like the sequence line(s) this may
-be line wrapped. read_fastq_sequences() will raise an exception if the
-quality and sequence strings are of unequal length. screed performs
-no checking for valid quality scores.</p>
-</div>
-<div class="section" id="fasta">
-<h2>FASTA</h2>
-<p>The FASTA parsing function is read_fasta_sequences() and is also located
-in the screed module.</p>
-<p>The first line in a record must begin with '>' and is followed with the
-sequence's name and an optional description. If the description is
-included, it is separated from the name with a space. Note that though
-the FASTA format doesn't require named records, screed does. Without a
-unique name, screed can't look up sequences by name.</p>
-<p>The second line begins the line(s) of sequence. Like the FASTQ parser,
-read_fasta_sequences() allows any number of lines of any length.</p>
-</div>
-</div>
-<div class="section" id="fasta-fastq-conversion">
-<h1>FASTA <-> FASTQ Conversion</h1>
-<p>As an extra nicety, screed can convert FASTA files to FASTQ and back again.</p>
-<div class="section" id="fasta-to-fastq">
-<h2>FASTA to FASTQ</h2>
-<p>The function used for this process is called 'ToFastq' and is located in the
-screed module. It takes the path to a screed database as the first argument
-and a path to the desired FASTQ file as the second argument. There is also
-a shell interface called ToFastq.py:</p>
-<pre class="literal-block">
-$ ./ToFastq.py <path to fasta db> <converted fastq file>
-</pre>
-<p>or:</p>
-<pre class="literal-block">
-$ python -m screed.ToFastq <path to fasta db> <converted fastq file>
-</pre>
-<p>if the screed module is in your PATH.</p>
-<p>The FASTA name attribute is directly dumped from the file. The sequence
-attribute is also dumped pretty much directly, but is line wrapped to 80
-characters if it is longer.</p>
-<p>Any description line in the FASTA database is stored as a FASTQ annotation
-string with no other interpretation done.</p>
-<p>Finally, as there is no quality or quality score in a FASTA file, a default
-one is generated. The generation of the quality follows the Sanger FASTQ
-conventions. The score is 1 (ASCII: '"') meaning a probability of about 75%
-that the read is incorrect (1 in 4 chance). This PHRED quality score is
-calculated from the Sanger format: Q = -10log(p) where p is the probability
-of an incorrect read. Obviously this is a very rough way of providing a
-quality score and it is only intended to fill in the requirements of a FASTQ
-file. Any application needing a true measurement of the quality should
-not rely on this automatic conversion.</p>
-</div>
-<div class="section" id="fastq-to-fasta">
-<h2>FASTQ to FASTA</h2>
-<p>The function used for this process is called 'toFasta' and is located in the
-screed module. It takes the path to a screed database as the first argument
-and a path to the desired FASTA file as the second argument. Like the ToFastq
-function before, there is a shell interface to ToFasta:</p>
-<pre class="literal-block">
-$ ./ToFasta.py <path to fastq db> <converted fasta file>
-</pre>
-<p>or:</p>
-<pre class="literal-block">
-$ python -m screed.ToFasta <path to fastq db> <converted fasta file>
-</pre>
-<p>if the screed module is in your PATH.</p>
-<p>As above, the name and sequence attributes are directly dumped from the FASTQ
-database to the FASTA file with the sequence line wrapping to 80 characters.</p>
-<p>If it exists, the FASTQ annotation tag is stored as the FASTA description tag.
-As there is no equivalent in FASTA, the FASTQ quality score is ignored.</p>
-<!-- Local Variables:
-mode: rst
-mode: outline-minor
-End: -->
-</div>
-</div>
-</div>
-</body>
-</html>
diff --git a/doc/screed.txt b/doc/screed.rst
similarity index 97%
rename from doc/screed.txt
rename to doc/screed.rst
index c897bff..a4a4c3f 100755
--- a/doc/screed.txt
+++ b/doc/screed.rst
@@ -26,23 +26,16 @@ The following software packages are required to run screed:
 * Python 2.4 or newer
 * nose (for testing)
 
-Downloading
------------
-
-You will need git to download a copy from the public git repository:
-
-    git clone git://github.com/ged-lab/screed.git
-
 Installing
-----------
+-----------
 
-Assuming you have already downloaded the package, this is how to install::
+Use pip to download, and install Screed and its dependencies::
 
-    $ python setup.py install
+    pip install screed
 
 To run the optional tests type::
 
-    $ python -m screed.tests.__main__
+    nosetests screed --attr '!known_failing'
 
 Quick-Start
 ===========
@@ -53,8 +46,9 @@ Reading FASTA/FASTQ files
 At the Python prompt, type::
 
    >>> import screed
-   >>> for read in screed.open(filename):
-   ...   print read.name, read.sequence
+   >>> with screed.open(filename) as seqfile:
+   >>>     for read in seqfile:
+   ...         print read.name, read.sequence
 
 Here, 'filename' can be a FASTA or FASTQ file, and can be
 uncompressed, gzipped, or bz2-zipped.
diff --git a/doc/user/known-issues.rst b/doc/user/known-issues.rst
new file mode 100644
index 0000000..80ee711
--- /dev/null
+++ b/doc/user/known-issues.rst
@@ -0,0 +1,18 @@
+.. vim: set filetype=rst
+
+============
+Known Issues
+============
+
+This document details the known issues in the current release of screed. All
+issues for screed are tracked at https://github.com/dib-lab/khmer/labels/screed
+
+List of known issues
+====================
+
+Screed does not support gzip file streaming. This is an issue
+with Python 2.x and will likely *not* be fixed in future
+releases. https://github.com/dib-lab/khmer/issues/700
+
+Screed is overly tolerant of spaces in fast{q,a} which is against
+spec. https://github.com/dib-lab/khmer/issues/108
diff --git a/doc/user/known-issues.txt b/doc/user/known-issues.txt
deleted file mode 100644
index 3fc90e5..0000000
--- a/doc/user/known-issues.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-.. vim: set filetype=rst
-
-============
-Known Issues
-============
-
-This document details the known issues in the current release of screed. All
-issues for screed are tracked at https://github.com/ged-lab/khmer/labels/screed
-
-List of known issues
-====================
-
-Screed does not support gzip file streaming. This is an issue
-with Python 2.x and will likely *not* be fixed in future
-releases. https://github.com/ged-lab/khmer/issues/700
-
-Screed is overly tolerant of spaces in fast{q,a} which is against
-spec. https://github.com/ged-lab/khmer/issues/108
-
-Screed records cannot be sliced requiring un-Pythonic techniques
-to achieve the same behavior This will be included in a future
-release. https://github.com/ged-lab/khmer/issues/768
-
-Screed self-tests do not use a temporary directory which causes tests run
-from package-based installs to fail https://github.com/ged-lab/khmer/issues/748
-
diff --git a/screed/__init__.py b/screed/__init__.py
index 2758da4..2432977 100755
--- a/screed/__init__.py
+++ b/screed/__init__.py
@@ -21,15 +21,19 @@ file into a screed database.
 Conversion between sequence file types is provided in the ToFastq and
 ToFasta functions
 """
-from openscreed import ScreedDB, open_writer
-from openscreed import open_reader as open
-from conversion import ToFastq
-from conversion import ToFasta
-from createscreed import create_db
-from seqparse import read_fastq_sequences
-from seqparse import read_fasta_sequences
-from dna import rc
-
-from _version import get_versions
+
+from __future__ import absolute_import
+
+from screed.openscreed import ScreedDB, open_writer
+from screed.openscreed import open_reader as open
+from screed.conversion import ToFastq
+from screed.conversion import ToFasta
+from screed.createscreed import create_db
+from screed.seqparse import read_fastq_sequences
+from screed.seqparse import read_fasta_sequences
+from screed.dna import rc
+from screed.screedRecord import Record
+
+from screed._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
diff --git a/screed/_version.py b/screed/_version.py
index 9005698..574bc64 100644
--- a/screed/_version.py
+++ b/screed/_version.py
@@ -6,23 +6,23 @@
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.13 (https://github.com/warner/python-versioneer)
+# versioneer-0.14 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
 
 # these strings will be replaced by git during git-archive
-git_refnames = " (tag: v0.8)"
-git_full = "ff9083881f1a3ef507d4149acadf06266154f79a"
+git_refnames = " (HEAD -> master, tag: v0.9)"
+git_full = "1600a6c305deb0268d9b6066b728477b5a5a48b1"
 
 # these strings are filled in when 'setup.py versioneer' creates _version.py
 tag_prefix = "v"
 parentdir_prefix = "."
 versionfile_source = "screed/_version.py"
 
-import errno
-import os
-import re
-import subprocess
-import sys
-
 
 def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
     assert isinstance(commands, list)
@@ -47,7 +47,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %s" % (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
@@ -125,13 +125,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
                 print("picking %s" % r)
             return {"version": r,
                     "full": keywords["full"].strip()}
-    # no suitable tags, so we use the full revision id
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return {"version": keywords["full"].strip(),
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
             "full": keywords["full"].strip()}
 
 
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%s' doesn't start with prefix '%s'"
+            print(fmt % (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+
+    return version, dirty
+
+
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
     # this runs 'git' from the root of the source tree. This only gets called
     # if the git-archive 'subst' keywords were *not* expanded, and
@@ -141,31 +187,33 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %s" % root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
 
 
-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
+def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
@@ -181,7 +229,7 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in range(len(versionfile_source.split('/'))):
+        for i in versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
         return default
diff --git a/screed/conversion.py b/screed/conversion.py
index adfe1a0..3ae21db 100644
--- a/screed/conversion.py
+++ b/screed/conversion.py
@@ -1,6 +1,7 @@
+from __future__ import absolute_import
 # Copyright (c) 2008-2010, Michigan State University
 
-from openscreed import ScreedDB
+from .openscreed import ScreedDB
 
 _MAXLINELEN = 80
 _null_quality = '\"'  # ASCII 34, e.g 75% chance of incorrect read
@@ -54,11 +55,11 @@ def ToFastq(dbFile, outputFile):
     db = ScreedDB(dbFile)
 
     for value in db.itervalues():
-        outFile.write('@%s %s\n%s\n+\n%s\n' % (value['name'],
-                                               GetComments(value),
-                                               linewrap(
-                                                   str(value['sequence'])),
-                                               GenerateQuality(value)))
+        line = '@%s %s\n%s\n+\n%s\n' % (value['name'],
+                                        GetComments(value),
+                                        linewrap(str(value['sequence'])),
+                                        GenerateQuality(value))
+        outFile.write(line.encode('UTF-8'))
     db.close()
     outFile.close()
 
@@ -72,8 +73,9 @@ def ToFasta(dbFile, outputFile):
     db = ScreedDB(dbFile)
 
     for value in db.itervalues():
-        outFile.write('>%s %s\n%s\n' % (value['name'], GetComments(value),
-                                        linewrap(str(value['sequence']))))
+        line = '>%s %s\n%s\n' % (value['name'], GetComments(value),
+                                 linewrap(str(value['sequence'])))
+        outFile.write(line.encode('UTF-8'))
 
     db.close()
     outFile.close()
diff --git a/screed/createscreed.py b/screed/createscreed.py
index 5e01334..d09447d 100644
--- a/screed/createscreed.py
+++ b/screed/createscreed.py
@@ -1,4 +1,5 @@
-import DBConstants
+from __future__ import absolute_import
+from . import DBConstants
 import os
 import sqlite3
 import itertools
diff --git a/screed/dna.py b/screed/dna.py
index 1052b8f..77d0dd8 100644
--- a/screed/dna.py
+++ b/screed/dna.py
@@ -1,5 +1,5 @@
-import string
 import array
+import string
 
 legal_dna = "ACGTN"
 
@@ -21,7 +21,7 @@ def reverse_complement(s):
     """
     Build reverse complement of 's'.
     """
-    s = string.upper(s)
+    s = s.upper()
     assert is_DNA(s), "Your sequence must be DNA!"
 
     r = reverse(s)
@@ -29,16 +29,20 @@ def reverse_complement(s):
 
     return rc
 
+
 rc = reverse_complement                 # alias 'rc' to 'reverse_complement'
 
-__complementTranslation = string.maketrans('ACTG', 'TGAC')
+try:
+    __complementTranslation = str.maketrans('ACTG', 'TGAC')
+except AttributeError:
+    __complementTranslation = string.maketrans('ACTG', 'TGAC')
 
 
 def complement(s):
     """
     Return complement of 's'.
     """
-    c = string.translate(s, __complementTranslation)
+    c = s.translate(__complementTranslation)
     return c
 
 
@@ -46,8 +50,6 @@ def reverse(s):
     """
     Return reverse of 's'.
     """
-    r = array.array('c', s)
-    r.reverse()
-    r = string.join(r, '')
+    r = "".join(reversed(s))
 
     return r
diff --git a/screed/dump_to_fasta.py b/screed/dump_to_fasta.py
index 586d982..aa58e51 100644
--- a/screed/dump_to_fasta.py
+++ b/screed/dump_to_fasta.py
@@ -2,6 +2,7 @@
 
 # Copyright (c) 2008-2010, Michigan State University
 
+from __future__ import print_function
 from screed import ToFasta
 import sys
 import os
@@ -9,14 +10,14 @@ import os
 # Shell interface to the ToFasta screed conversion function
 if __name__ == '__main__':
     if len(sys.argv) != 3:
-        print "Usage: %s <dbfilename> <outputfilename>" % sys.argv[0]
+        print("Usage: %s <dbfilename> <outputfilename>" % sys.argv[0])
         exit(1)
 
     dbFile = sys.argv[1]
     outputFile = sys.argv[2]
 
     if not os.path.isfile(dbFile):
-        print "No such file: %s" % dbFile
+        print("No such file: %s" % dbFile)
         exit(1)
     if os.path.isfile(outputFile):
         os.unlink(outputFile)
diff --git a/screed/dump_to_fastq.py b/screed/dump_to_fastq.py
index f5a4ef9..8551295 100644
--- a/screed/dump_to_fastq.py
+++ b/screed/dump_to_fastq.py
@@ -2,6 +2,7 @@
 
 # Copyright (c) 2008-2010, Michigan State University
 
+from __future__ import print_function
 from screed import ToFastq
 import sys
 import os
@@ -9,14 +10,14 @@ import os
 # Shell interface to the ToFastq screed conversion function
 if __name__ == '__main__':
     if len(sys.argv) != 3:
-        print "Usage: %s <dbfilename> <outputfilename>" % sys.argv[0]
+        print("Usage: %s <dbfilename> <outputfilename>" % sys.argv[0])
         exit(1)
 
     dbFile = sys.argv[1]
     outputFile = sys.argv[2]
 
     if not os.path.isfile(dbFile):
-        print "No such file: %s" % dbFile
+        print("No such file: %s" % dbFile)
         exit(1)
     if os.path.isfile(outputFile):
         os.unlink(outputFile)
diff --git a/screed/fadbm.py b/screed/fadbm.py
index 111b9d8..0c8db4a 100755
--- a/screed/fadbm.py
+++ b/screed/fadbm.py
@@ -16,4 +16,4 @@ if __name__ == "__main__":
     filename = sys.argv[1]
     read_fasta_sequences(filename)
 
-    print "Database saved in %s%s" % (sys.argv[1], DBConstants.fileExtension)
+    print("Database saved in %s%s" % (sys.argv[1], DBConstants.fileExtension))
diff --git a/screed/fasta.py b/screed/fasta.py
index df50769..07200ef 100644
--- a/screed/fasta.py
+++ b/screed/fasta.py
@@ -1,12 +1,15 @@
-import DBConstants
-from screedRecord import _screed_record_dict, _Writer
+from __future__ import absolute_import
+
+from . import DBConstants
+from .screedRecord import Record, _Writer
+from .utils import to_str
 
 FieldTypes = (('name', DBConstants._INDEXED_TEXT_KEY),
               ('description', DBConstants._STANDARD_TEXT),
               ('sequence', DBConstants._SLICEABLE_TEXT))
 
 
-def fasta_iter(handle, parse_description=True, line=None):
+def fasta_iter(handle, parse_description=False, line=None):
     """
     Iterator over the given FASTA file handle, returning records. handle
     is a handle to a file opened for reading
@@ -15,9 +18,9 @@ def fasta_iter(handle, parse_description=True, line=None):
         line = handle.readline()
 
     while line:
-        data = _screed_record_dict()
+        data = Record()
 
-        line = line.strip()
+        line = to_str(line.strip())
         if not line.startswith('>'):
             raise IOError("Bad FASTA format: no '>' at beginning of line")
 
@@ -36,10 +39,10 @@ def fasta_iter(handle, parse_description=True, line=None):
 
         # Collect sequence lines into a list
         sequenceList = []
-        line = handle.readline()
+        line = to_str(handle.readline())
         while line and not line.startswith('>'):
             sequenceList.append(line.strip())
-            line = handle.readline()
+            line = to_str(handle.readline())
 
         data['sequence'] = ''.join(sequenceList)
         yield data
diff --git a/screed/fastq.py b/screed/fastq.py
index b3e4361..b14bc6c 100644
--- a/screed/fastq.py
+++ b/screed/fastq.py
@@ -1,23 +1,27 @@
-import DBConstants
-from screedRecord import _screed_record_dict, _Writer
+from __future__ import absolute_import
+
+from . import DBConstants
+from .screedRecord import Record, _Writer
+from .utils import to_str
+
 FieldTypes = (('name', DBConstants._INDEXED_TEXT_KEY),
               ('annotations', DBConstants._STANDARD_TEXT),
               ('sequence', DBConstants._STANDARD_TEXT),
               ('quality', DBConstants._STANDARD_TEXT))
 
 
-def fastq_iter(handle, line=None, parse_description=True):
+def fastq_iter(handle, line=None, parse_description=False):
     """
     Iterator over the given FASTQ file handle returning records. handle
     is a handle to a file opened for reading
     """
     if line is None:
         line = handle.readline()
-    line = line.strip()
+    line = to_str(line.strip())
     while line:
-        data = _screed_record_dict()
+        data = Record()
 
-        if not line.startswith('@'):
+        if line and not line.startswith('@'):
             raise IOError("Bad FASTQ format: no '@' at beginning of line")
 
         # Try to grab the name and (optional) annotations
@@ -34,22 +38,22 @@ def fastq_iter(handle, line=None, parse_description=True):
 
         # Extract the sequence lines
         sequence = []
-        line = handle.readline().strip()
-        while not line.startswith('+') and not line.startswith('#'):
+        line = to_str(handle.readline().strip())
+        while line and not line.startswith('+') and not line.startswith('#'):
             sequence.append(line)
-            line = handle.readline().strip()
+            line = to_str(handle.readline().strip())
 
         data['sequence'] = ''.join(sequence)
 
         # Extract the quality lines
         quality = []
-        line = handle.readline().strip()
+        line = to_str(handle.readline().strip())
         seqlen = len(data['sequence'])
         aclen = 0
         while not line == '' and aclen < seqlen:
             quality.append(line)
             aclen += len(line)
-            line = handle.readline().strip()
+            line = to_str(handle.readline().strip())
 
         data['quality'] = ''.join(quality)
         if len(data['sequence']) != len(data['quality']):
diff --git a/screed/fqdbm.py b/screed/fqdbm.py
index 81b3581..4c3f4e7 100755
--- a/screed/fqdbm.py
+++ b/screed/fqdbm.py
@@ -2,9 +2,11 @@
 
 # Copyright (c) 2008-2010, Michigan State University
 
+from __future__ import absolute_import
+
 import sys
-from __init__ import read_fastq_sequences
-import DBConstants
+from screed import read_fastq_sequences
+from screed import DBConstants
 
 # A shell interface to the screed FQDBM database writing function
 if __name__ == "__main__":
@@ -16,5 +18,5 @@ if __name__ == "__main__":
     filename = sys.argv[1]
     read_fastq_sequences(filename)
 
-    print "Database saved in %s%s" % (sys.argv[1], DBConstants.fileExtension)
+    print("Database saved in %s%s" % (sys.argv[1], DBConstants.fileExtension))
     exit(0)
diff --git a/screed/hava.py b/screed/hava.py
index 18fc820..1d5a6f8 100644
--- a/screed/hava.py
+++ b/screed/hava.py
@@ -1,4 +1,7 @@
-import DBConstants
+from __future__ import absolute_import
+
+from . import DBConstants
+from .utils import to_str
 
 FieldTypes = (('hava', DBConstants._INDEXED_TEXT_KEY),
               ('quarzk', DBConstants._STANDARD_TEXT),
@@ -14,14 +17,14 @@ def hava_iter(handle):
     is a handle to a file opened for reading
     """
     data = {}
-    line = handle.readline().strip()
+    line = to_str(handle.readline().strip())
     while line:
         data['hava'] = line
-        data['quarzk'] = handle.readline().strip()
-        data['muchalo'] = handle.readline().strip()
-        data['fakours'] = handle.readline().strip()
-        data['selimizicka'] = handle.readline().strip()
-        data['marshoon'] = handle.readline().strip()
+        data['quarzk'] = to_str(handle.readline().strip())
+        data['muchalo'] = to_str(handle.readline().strip())
+        data['fakours'] = to_str(handle.readline().strip())
+        data['selimizicka'] = to_str(handle.readline().strip())
+        data['marshoon'] = to_str(handle.readline().strip())
 
-        line = handle.readline().strip()
+        line = to_str(handle.readline().strip())
         yield data
diff --git a/screed/openscreed.py b/screed/openscreed.py
index 0e3b64c..765ec44 100644
--- a/screed/openscreed.py
+++ b/screed/openscreed.py
@@ -1,17 +1,25 @@
+# Copyright (c) 2008-2015, Michigan State University
+"""Reader and writer for screed."""
+
+from __future__ import absolute_import
+
 import os
-import types
-import UserDict
+import io
+import sys
 import sqlite3
 import gzip
 import bz2file
-import StringIO
-import io
-import sys
+try:
+    from collections import MutableMapping
+except ImportError:
+    import UserDict
+    MutableMapping = UserDict.DictMixin
 
-import DBConstants
-import screedRecord
-from fastq import fastq_iter, FASTQ_Writer
-from fasta import fasta_iter, FASTA_Writer
+from . import DBConstants
+from . import screedRecord
+from .fastq import fastq_iter, FASTQ_Writer
+from .fasta import fasta_iter, FASTA_Writer
+from .utils import to_str
 
 
 def get_writer_class(read_iter):
@@ -27,57 +35,99 @@ def open_writer(inp_filename, outp_filename):
     return klass(outp_filename)
 
 
-def open_reader(filename, *args, **kwargs):
-    """
-    Make a best-effort guess as to how to open/parse the given sequence file.
+def _normalize_filename(filename):
+    """Map '-' to '/dev/stdin' to handle the usual shortcut."""
+    if filename == '-':
+        filename = '/dev/stdin'
+    return filename
+
+
+class Open(object):
+    def __init__(self, filename, *args, **kwargs):
+        self.sequencefile = None
+        self.iter_fn = self.open_reader(filename, *args, **kwargs)
+        if self.iter_fn:
+            self.__name__ = self.iter_fn.__name__
+
+    def open_reader(self, filename, *args, **kwargs):
+        """
+        Make a best-effort guess as to how to parse the given sequence file.
+
+        Handles '-' as shortcut for stdin.
+        Deals with .gz, FASTA, and FASTQ records.
+        """
+        magic_dict = {
+            b"\x1f\x8b\x08": "gz",
+            b"\x42\x5a\x68": "bz2",
+            # "\x50\x4b\x03\x04": "zip"
+        }  # Inspired by http://stackoverflow.com/a/13044946/1585509
+        filename = _normalize_filename(filename)
+        bufferedfile = io.open(file=filename, mode='rb', buffering=8192)
+        num_bytes_to_peek = max(len(x) for x in magic_dict)
+        file_start = bufferedfile.peek(num_bytes_to_peek)
+        compression = None
+        for magic, ftype in magic_dict.items():
+            if file_start.startswith(magic):
+                compression = ftype
+                break
+        if compression is 'bz2':
+            sequencefile = bz2file.BZ2File(filename=bufferedfile)
+            peek = sequencefile.peek(1)
+        elif compression is 'gz':
+            if not bufferedfile.seekable():
+                bufferedfile.close()
+                raise ValueError("gziped data not streamable, pipe through zcat \
+                                first")
+            peek = gzip.GzipFile(filename=filename).read(1)
+            sequencefile = gzip.GzipFile(filename=filename)
+        else:
+            peek = bufferedfile.peek(1)
+            sequencefile = bufferedfile
+
+        iter_fn = None
+        try:
+            first_char = peek[0]
+        except IndexError as err:
+            return []  # empty file
 
-    Deals with .gz, FASTA, and FASTQ records.
-    """
-    magic_dict = {
-        "\x1f\x8b\x08": "gz",
-        "\x42\x5a\x68": "bz2",
-        # "\x50\x4b\x03\x04": "zip"
-    }  # Inspired by http://stackoverflow.com/a/13044946/1585509
-    bufferedfile = io.open(file=filename, mode='rb', buffering=8192)
-    num_bytes_to_peek = max(len(x) for x in magic_dict)
-    file_start = bufferedfile.peek(num_bytes_to_peek)
-    compression = None
-    for magic, ftype in magic_dict.items():
-        if file_start.startswith(magic):
-            compression = ftype
-            break
-    if compression is 'bz2':
-        sequencefile = bz2file.BZ2File(filename=bufferedfile)
-        peek = sequencefile.peek(1)
-    elif compression is 'gz':
-        if not bufferedfile.seekable():
-            raise ValueError("gziped data not streamable, pipe through zcat \
-                             first")
-        peek = gzip.GzipFile(filename=filename).read(1)
-        sequencefile = gzip.GzipFile(filename=filename)
-    else:
-        peek = bufferedfile.peek(1)
-        sequencefile = bufferedfile
-
-    iter_fn = None
-    try:
-        if peek[0] == '>':
+        try:
+            first_char = chr(first_char)
+        except TypeError:
+            pass
+
+        if first_char == '>':
             iter_fn = fasta_iter
-        elif peek[0] == '@':
+        elif first_char == '@':
             iter_fn = fastq_iter
-    except IndexError as err:
-        return []  # empty file
 
-    if iter_fn is None:
-        raise ValueError("unknown file format for '%s'" % filename)
+        if iter_fn is None:
+            raise ValueError("unknown file format for '%s'" % filename)
+
+        self.sequencefile = sequencefile
+        return iter_fn(sequencefile, *args, **kwargs)
+
+    def __enter__(self):
+        return self.iter_fn
+
+    def __exit__(self, *exc_info):
+        self.close()
+
+    def __iter__(self):
+        if self.iter_fn:
+            return self.iter_fn
+        return iter(())
+
+    def close(self):
+        if self.sequencefile is not None:
+            self.sequencefile.close()
 
-    return iter_fn(sequencefile, *args, **kwargs)
 
 _open = open
-open = open_reader
+open = Open
+open_reader = open
 
 
-class ScreedDB(object, UserDict.DictMixin):
+class ScreedDB(MutableMapping):
 
     """
     Core on-disk dictionary interface for reading screed databases. Accepts a
@@ -115,7 +165,7 @@ class ScreedDB(object, UserDict.DictMixin):
                             % self._filepath)
 
         nothing = res.fetchone()
-        if not isinstance(nothing, type(None)):
+        if type(nothing) is not type(None):
             self._db.close()
             raise TypeError("Database %s has too many tables." % filename)
 
@@ -165,7 +215,7 @@ class ScreedDB(object, UserDict.DictMixin):
                                                   DBConstants._DICT_TABLE,
                                                   self._queryBy)
         res = cursor.execute(query, (key,))
-        if isinstance(res.fetchone(), type(None)):
+        if type(res.fetchone()) is type(None):
             raise KeyError("Key %s not found" % key)
         return screedRecord._buildRecord(self.fields, self._db,
                                          key,
@@ -194,7 +244,7 @@ class ScreedDB(object, UserDict.DictMixin):
                                                   DBConstants._DICT_TABLE,
                                                   DBConstants._PRIMARY_KEY)
         res = cursor.execute(query, (index,))
-        if isinstance(res.fetchone(), type(None)):
+        if type(res.fetchone()) is type(None):
             raise KeyError("Index %d not found" % index)
         return screedRecord._buildRecord(self.fields, self._db,
                                          index,
@@ -223,7 +273,7 @@ class ScreedDB(object, UserDict.DictMixin):
         """
         Iterator over records in the database
         """
-        for index in xrange(1, self.__len__() + 1):
+        for index in range(1, self.__len__() + 1):
             yield screedRecord._buildRecord(self.fields, self._db,
                                             index,
                                             DBConstants._PRIMARY_KEY)
@@ -238,6 +288,9 @@ class ScreedDB(object, UserDict.DictMixin):
         for key, in cursor.execute(query):
             yield key
 
+    def __iter__(self):
+        return self.iterkeys()
+
     def iteritems(self):
         """
         Iterator returning a (index, record) pairs
@@ -274,34 +327,40 @@ class ScreedDB(object, UserDict.DictMixin):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
+
+    def __delitem__(self, something):
+        """
+        Not implemented (Read-only database)
+        """
+        raise NotImplementedError
 
     def clear(self):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
 
     def update(self, something):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
 
     def setdefault(self, something):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
 
     def pop(self):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
 
     def popitem(self):
         """
         Not implemented (Read-only database)
         """
-        raise AttributeError
+        raise NotImplementedError
diff --git a/screed/pygr_api.py b/screed/pygr_api.py
index 1fc675d..383f332 100644
--- a/screed/pygr_api.py
+++ b/screed/pygr_api.py
@@ -154,7 +154,7 @@ class _ScreedSeqInfoDict_ByIndex(object, UserDict.DictMixin):
         return _ScreedSequenceInfo(k, v)
 
     def keys(self):
-        return xrange(0, len(self.sdb))
+        return range(0, len(self.sdb))
 
     def iterkeys(self):
         i = 0
@@ -171,8 +171,8 @@ if __name__ == '__main__':
 
     db = ScreedSequenceDB(filename)
     for k in db:
-        print k, repr(db[k]), db[k].name
+        print(k, repr(db[k]), db[k].name)
 
     db = ScreedSequenceDB_ByIndex(filename)
     for k in db:
-        print k, repr(db[k]), db[k].name
+        print(k, repr(db[k]), db[k].name)
diff --git a/screed/screedRecord.py b/screed/screedRecord.py
index a811b6f..ce34a7f 100644
--- a/screed/screedRecord.py
+++ b/screed/screedRecord.py
@@ -1,12 +1,17 @@
-import UserDict
+from __future__ import absolute_import
 import types
-import DBConstants
+from . import DBConstants
 import gzip
 import bz2
 
+try:
+    from collections import MutableMapping
+except ImportError:
+    import UserDict
+    MutableMapping = UserDict.DictMixin
 
-class _screed_record_dict(UserDict.DictMixin):
 
+class Record(MutableMapping):
     """
     Simple dict-like record interface with bag behavior.
     """
@@ -14,9 +19,6 @@ class _screed_record_dict(UserDict.DictMixin):
     def __init__(self, *args, **kwargs):
         self.d = dict(*args, **kwargs)
 
-    def __getitem__(self, name):
-        return self.d[name]
-
     def __setitem__(self, name, value):
         self.d[name] = value
 
@@ -32,6 +34,24 @@ class _screed_record_dict(UserDict.DictMixin):
     def keys(self):
         return self.d.keys()
 
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            trimmed = Record(self.d)
+            trimmed['sequence'] = trimmed['sequence'][idx]
+            if 'quality' in trimmed:
+                trimmed['quality'] = trimmed['quality'][idx]
+            return Record(trimmed)
+        return self.d[idx]
+
+    def __delitem__(self, key):
+        del self.d[key]
+
+    def __iter__(self):
+        return iter(self.d)
+
+    def __repr__(self):
+        return repr(self.d)
+
 
 class _screed_attr(object):
 
@@ -181,7 +201,7 @@ def _buildRecord(fieldTuple, dbObj, rowName, queryBy):
         else:
             hackedResult.append((key, value))
 
-    return _screed_record_dict(hackedResult)
+    return Record(hackedResult)
 
 
 class _Writer(object):
diff --git a/screed/seqparse.py b/screed/seqparse.py
index 7276ff6..77f8bd7 100644
--- a/screed/seqparse.py
+++ b/screed/seqparse.py
@@ -6,12 +6,16 @@ functionality to arbitrary sequence formats. An example 'hava'
 parser is included for API reference
 """
 
+from __future__ import absolute_import
+
 import os
-from createscreed import create_db
-from openscreed import ScreedDB
-import fastq
-import fasta
-import hava
+
+from .createscreed import create_db
+from .openscreed import ScreedDB
+from . import openscreed
+from . import fastq
+from . import fasta
+from . import hava
 
 # [AN] these functions look strangely similar
 
@@ -20,10 +24,8 @@ def read_fastq_sequences(filename):
     """
     Function to parse text from the given FASTQ file into a screed database
     """
-    import openscreed
-
     # Will raise an exception if the file doesn't exist
-    iterfunc = openscreed.open(filename)
+    iterfunc = openscreed.open(filename, parse_description=True)
 
     # Create the screed db
     create_db(filename, fastq.FieldTypes, iterfunc)
@@ -35,10 +37,8 @@ def read_fasta_sequences(filename):
     """
     Function to parse text from the given FASTA file into a screed database
     """
-    import openscreed
-
     # Will raise an exception if the file doesn't exist
-    iterfunc = openscreed.open(filename)
+    iterfunc = openscreed.open(filename, parse_description=True)
 
     # Create the screed db
     create_db(filename, fasta.FieldTypes, iterfunc)
diff --git a/screed/tests/havaGen.py b/screed/tests/havaGen.py
index 7b431c1..d58d402 100755
--- a/screed/tests/havaGen.py
+++ b/screed/tests/havaGen.py
@@ -17,6 +17,7 @@ the nosetests.
 This is a work of fiction. Names are the product of the author's imagination
 and any resemblance to real life is entirely coincidental.
 """
+from __future__ import print_function
 
 import sys
 import os
@@ -90,7 +91,7 @@ def createHavaFiles(filename, size, divisions):
 
 if __name__ == '__main__':
     if len(sys.argv) != 4:
-        print "Usage: <filename> <size> <divisions>"
+        print("Usage: <filename> <size> <divisions>")
         exit(1)
 
     filename = sys.argv[1]
diff --git a/screed/tests/screed_tst_utils.py b/screed/tests/screed_tst_utils.py
new file mode 100644
index 0000000..f008d93
--- /dev/null
+++ b/screed/tests/screed_tst_utils.py
@@ -0,0 +1,49 @@
+#
+# This file is part of screed, http://github.com/dib-lab/screed/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+# This file has been modified from the khmer project at
+# https://github.com/dib-lab/khmer/blob/a8356b7abbebf8540c7656378b1459442b781f87/tests/khmer_tst_utils.py
+#
+
+import tempfile
+import os
+import shutil
+from pkg_resources import Requirement, resource_filename, ResolutionError
+from io import StringIO
+import nose
+import sys
+import traceback
+
+
+def get_test_data(filename):
+    filepath = None
+    try:
+        filepath = resource_filename(
+            Requirement.parse("screed"), "screed/tests/" + filename)
+    except ResolutionError:
+        pass
+    if not filepath or not os.path.isfile(filepath):
+        filepath = os.path.join(os.path.dirname(__file__), 'test-data',
+                                filename)
+    return filepath
+
+cleanup_list = []
+
+
+def get_temp_filename(filename, tempdir=None):
+    if tempdir is None:
+        tempdir = tempfile.mkdtemp(prefix='screedtest_')
+        cleanup_list.append(tempdir)
+
+    return os.path.join(tempdir, filename)
+
+
+def cleanup():
+    global cleanup_list
+
+    for path in cleanup_list:
+        shutil.rmtree(path, ignore_errors=True)
+    cleanup_list = []
diff --git a/screed/tests/empty.fa b/screed/tests/test-data/empty.fa
similarity index 100%
rename from screed/tests/empty.fa
rename to screed/tests/test-data/empty.fa
diff --git a/screed/tests/test-whitespace.fa b/screed/tests/test-data/test-whitespace.fa
similarity index 100%
rename from screed/tests/test-whitespace.fa
rename to screed/tests/test-data/test-whitespace.fa
diff --git a/screed/tests/test.fa b/screed/tests/test-data/test.fa
similarity index 100%
rename from screed/tests/test.fa
rename to screed/tests/test-data/test.fa
diff --git a/screed/tests/test.fa.bz2 b/screed/tests/test-data/test.fa.bz2
similarity index 100%
rename from screed/tests/test.fa.bz2
rename to screed/tests/test-data/test.fa.bz2
diff --git a/screed/tests/test.fa.gz b/screed/tests/test-data/test.fa.gz
similarity index 100%
rename from screed/tests/test.fa.gz
rename to screed/tests/test-data/test.fa.gz
diff --git a/screed/tests/test.fa.zip b/screed/tests/test-data/test.fa.zip
similarity index 100%
rename from screed/tests/test.fa.zip
rename to screed/tests/test-data/test.fa.zip
diff --git a/screed/tests/test.fastq b/screed/tests/test-data/test.fastq
similarity index 100%
rename from screed/tests/test.fastq
rename to screed/tests/test-data/test.fastq
diff --git a/screed/tests/test.fastq.bz2 b/screed/tests/test-data/test.fastq.bz2
similarity index 100%
rename from screed/tests/test.fastq.bz2
rename to screed/tests/test-data/test.fastq.bz2
diff --git a/screed/tests/test.fastq.gz b/screed/tests/test-data/test.fastq.gz
similarity index 100%
rename from screed/tests/test.fastq.gz
rename to screed/tests/test-data/test.fastq.gz
diff --git a/screed/tests/test.hava b/screed/tests/test-data/test.hava
similarity index 100%
rename from screed/tests/test.hava
rename to screed/tests/test-data/test.hava
diff --git a/screed/tests/test_convert.py b/screed/tests/test_convert.py
index 432b3a1..130e84e 100644
--- a/screed/tests/test_convert.py
+++ b/screed/tests/test_convert.py
@@ -1,7 +1,10 @@
-import test_fasta
+from __future__ import absolute_import
+from . import test_fasta
 import os
 import screed
 from screed.DBConstants import fileExtension
+from . import screed_tst_utils as utils
+import shutil
 
 
 class Test_fasta_to_fastq(test_fasta.Test_fasta):
@@ -13,10 +16,11 @@ class Test_fasta_to_fastq(test_fasta.Test_fasta):
     """
 
     def setup(self):
-        thisdir = os.path.dirname(__file__)
-        self._fqName = os.path.join(thisdir, 'fa_to_fq')
-        self._faName = os.path.join(thisdir, 'fq_to_fa')
-        self._testfa = os.path.join(thisdir, 'test.fa')
+
+        self._fqName = utils.get_temp_filename('fa_to_fq')
+        self._faName = utils.get_temp_filename('fq_to_fa')
+        self._testfa = utils.get_temp_filename('test.fa')
+        shutil.copy(utils.get_test_data('test.fa'), self._testfa)
 
         screed.read_fasta_sequences(self._testfa)
         screed.ToFastq(self._testfa, self._fqName)  # Fasta db -> fasta text
diff --git a/screed/tests/test_dictionary.py b/screed/tests/test_dictionary.py
index 79b19fe..12d6a45 100644
--- a/screed/tests/test_dictionary.py
+++ b/screed/tests/test_dictionary.py
@@ -1,6 +1,9 @@
+from __future__ import absolute_import
 import os
 import screed
 from screed.DBConstants import fileExtension
+from . import screed_tst_utils as utils
+import shutil
 
 
 class Test_dict_methods(object):
@@ -11,7 +14,9 @@ class Test_dict_methods(object):
     """
 
     def setup(self):
-        self._testfa = os.path.join(os.path.dirname(__file__), 'test.fa')
+        self._testfa = utils.get_temp_filename('test.fa')
+        shutil.copy(utils.get_test_data('test.fa'), self._testfa)
+
         screed.read_fasta_sequences(self._testfa)
         self.db = screed.ScreedDB(self._testfa)
 
@@ -22,15 +27,15 @@ class Test_dict_methods(object):
         db = self.db
         keys = db.keys()
         ikeys = list(db.iterkeys())
-        assert sorted(keys) == sorted(ikeys)
+        assert all(key in ikeys for key in keys)
 
         values = db.values()
         ivalues = list(db.itervalues())
-        assert sorted(values) == sorted(ivalues)
+        assert all(value in ivalues for value in values)
 
         items = db.items()
         iitems = list(db.iteritems())
-        assert sorted(items) == sorted(iitems)
+        assert all(item in iitems for item in items)
 
     def test_contains(self):
         for k in self.db:
@@ -69,29 +74,29 @@ class Test_dict_methods(object):
         try:
             db.clear()
             assert 0
-        except AttributeError:
+        except NotImplementedError:
             pass
 
         try:
             db.update({})
             assert 0
-        except AttributeError:
+        except NotImplementedError:
             pass
 
         try:
             db.setdefault(None)
             assert 0
-        except AttributeError:
+        except NotImplementedError:
             pass
 
         try:
             db.pop()
             assert 0
-        except AttributeError:
+        except NotImplementedError:
             pass
 
         try:
             db.popitem()
             assert 0
-        except AttributeError:
+        except NotImplementedError:
             pass
diff --git a/screed/tests/test_fasta.py b/screed/tests/test_fasta.py
index d3bb45d..20d67fc 100644
--- a/screed/tests/test_fasta.py
+++ b/screed/tests/test_fasta.py
@@ -1,7 +1,10 @@
+from __future__ import absolute_import, unicode_literals
 import screed
 from screed.DBConstants import fileExtension
 import os
-from cStringIO import StringIO
+from io import StringIO
+from . import screed_tst_utils as utils
+import shutil
 
 
 def test_new_record():
@@ -19,7 +22,9 @@ def test_new_record():
 class Test_fasta(object):
 
     def setup(self):
-        self._testfa = os.path.join(os.path.dirname(__file__), 'test.fa')
+        self._testfa = utils.get_temp_filename('test.fa')
+        shutil.copy(utils.get_test_data('test.fa'), self._testfa)
+
         screed.read_fasta_sequences(self._testfa)
         self.db = screed.ScreedDB(self._testfa)
 
@@ -39,7 +44,7 @@ class Test_fasta(object):
             intRcrd = self.db.loadRecordByIndex(record.id)
             assert record == intRcrd
 
-    def test_length(self):
+    def test_length_2(self):
         read = self.db[self.db.keys()[0]]
 
         assert len(read) == len(read.sequence)
@@ -88,7 +93,7 @@ class Test_fasta(object):
             entries.append(self.db[entry])
 
         ivalues = list(self.db.itervalues())
-        assert sorted(entries) == sorted(ivalues)
+        assert all(entry in ivalues for entry in entries)
 
     def test_iteri(self):
         for id, entry in self.db.iteritems():
@@ -99,8 +104,9 @@ class Test_fasta(object):
 class Test_fasta_whitespace(object):
 
     def setup(self):
-        self._testfa = os.path.join(os.path.dirname(__file__),
-                                    'test-whitespace.fa')
+        self._testfa = utils.get_temp_filename('test-whitespace.fa')
+        shutil.copy(utils.get_test_data('test-whitespace.fa'), self._testfa)
+
         screed.read_fasta_sequences(self._testfa)
         self.db = screed.ScreedDB(self._testfa)
 
@@ -145,3 +151,27 @@ def test_writer_2():
     w.consume(read_iter)
 
     assert fp.getvalue() == '>foo bar\nATCG\n'
+
+
+def test_fasta_slicing():
+    testfa = utils.get_temp_filename('test.fa')
+    shutil.copy(utils.get_test_data('test.fa'), testfa)
+
+    with screed.open(testfa) as sequences:
+        record = next(sequences)
+
+    trimmed = record[:10]
+    assert trimmed['sequence'] == "TGCAGAAAAT"
+
+    for s in (slice(5, 10), slice(2, 26), slice(5, -1, 2),
+              slice(-2, -10, 1), slice(-1, 5, 2), slice(5)):
+        trimmed = record[s]
+
+        assert trimmed['name'] == record['name']
+        assert trimmed.name == record.name
+
+        assert trimmed['description'] == record['description']
+        assert trimmed.description == record.description
+
+        assert trimmed['sequence'] == record['sequence'][s]
+        assert trimmed.sequence == record.sequence[s]
diff --git a/screed/tests/test_fasta_recover.py b/screed/tests/test_fasta_recover.py
index b73327a..a00bbf5 100644
--- a/screed/tests/test_fasta_recover.py
+++ b/screed/tests/test_fasta_recover.py
@@ -1,15 +1,20 @@
-import test_fasta
+from __future__ import absolute_import
+from . import test_fasta
 import os
 import screed
 from screed.DBConstants import fileExtension
+from . import screed_tst_utils as utils
+import shutil
 
 
 class test_fa_recover(test_fasta.Test_fasta):
 
     def setup(self):
-        self._fileName = os.path.join(
-            os.path.dirname(__file__), 'fastaRecovery')
-        self._testfa = os.path.join(os.path.dirname(__file__), 'test.fa')
+        self._fileName = utils.get_temp_filename('fastaRecovery')
+
+        self._testfa = utils.get_temp_filename('test.fa')
+        shutil.copy(utils.get_test_data('test.fa'), self._testfa)
+
         screed.read_fasta_sequences(self._testfa)
         screed.ToFasta(self._testfa, self._fileName)
         screed.read_fasta_sequences(self._fileName)
diff --git a/screed/tests/test_fastq.py b/screed/tests/test_fastq.py
index c95dfa2..b84d7f6 100644
--- a/screed/tests/test_fastq.py
+++ b/screed/tests/test_fastq.py
@@ -1,7 +1,10 @@
+from __future__ import absolute_import, unicode_literals
 import screed
 from screed.DBConstants import fileExtension
 import os
-from cStringIO import StringIO
+from io import StringIO
+from . import screed_tst_utils as utils
+import shutil
 
 
 def test_new_record():
@@ -27,13 +30,6 @@ def test_parse_description_true():
     assert records[0]['name'] == '1'
     assert records[1]['name'] == '2'
 
-    # also is default behavior
-    s = StringIO("@1 FOO\nACTG\n+\nAAAA\n at 2\nACGG\n+\nAAAA\n")
-
-    records = list(iter(screed.fastq.fastq_iter(s)))
-    assert records[0]['name'] == '1'
-    assert records[1]['name'] == '2'
-
 
 def test_parse_description_false():
     # test for a bug where the record dict was not reset after each
@@ -46,11 +42,20 @@ def test_parse_description_false():
     assert records[0]['name'] == '1 FOO'
     assert records[1]['name'] == '2'
 
+    # also is default behavior
+    s = StringIO("@1 FOO\nACTG\n+\nAAAA\n at 2\nACGG\n+\nAAAA\n")
+
+    records = list(iter(screed.fastq.fastq_iter(s)))
+    assert records[0]['name'] == '1 FOO'
+    assert records[1]['name'] == '2'
+
 
 class Test_fastq(object):
 
     def setup(self):
-        self._testfq = os.path.join(os.path.dirname(__file__), 'test.fastq')
+        self._testfq = utils.get_temp_filename('test.fastq')
+        shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
+
         screed.read_fastq_sequences(self._testfq)
         self.db = screed.ScreedDB(self._testfq)
 
@@ -105,7 +110,7 @@ class Test_fastq(object):
             entries.append(self.db[entry])
 
         ivalues = list(self.db.itervalues())
-        assert sorted(entries) == sorted(ivalues)
+        assert all(entry in ivalues for entry in entries)
 
     def test_iteri(self):
         for id, entry in self.db.iteritems():
@@ -149,3 +154,28 @@ def test_writer_2():
     w.consume(read_iter)
 
     assert fp.getvalue() == '@foo bar\nATCG\n+\n####\n'
+
+
+def test_fastq_slicing():
+    testfq = utils.get_temp_filename('test.fastq')
+    shutil.copy(utils.get_test_data('test.fastq'), testfq)
+
+    with screed.open(testfq) as sequences:
+        record = next(sequences)
+
+    trimmed = record[:10]
+    assert trimmed['sequence'] == "ACAGCAAAAT"
+    assert trimmed['quality'] == "AA7AAA3+AA"
+
+    for s in (slice(5, 10), slice(2, 26), slice(5, -1, 2),
+              slice(-2, -10, 1), slice(-1, 5, 2), slice(5)):
+        trimmed = record[s]
+
+        assert trimmed['name'] == record['name']
+        assert trimmed.name == record.name
+
+        assert trimmed['sequence'] == record['sequence'][s]
+        assert trimmed.sequence == record.sequence[s]
+
+        assert trimmed['quality'] == record['quality'][s]
+        assert trimmed.quality == record.quality[s]
diff --git a/screed/tests/test_fastq_recover.py b/screed/tests/test_fastq_recover.py
index 59bd1c9..5bf7dcf 100644
--- a/screed/tests/test_fastq_recover.py
+++ b/screed/tests/test_fastq_recover.py
@@ -1,15 +1,20 @@
-import test_fastq
+from __future__ import absolute_import
+from . import test_fastq
 import os
 import screed
 from screed.DBConstants import fileExtension
+from . import screed_tst_utils as utils
+import shutil
 
 
 class test_fq_recover(test_fastq.Test_fastq):
 
     def setup(self):
-        thisdir = os.path.dirname(__file__)
-        self._fileName = os.path.join(thisdir, 'fastqRecovery')
-        self._testfq = os.path.join(thisdir, 'test.fastq')
+        self._fileName = utils.get_temp_filename('fastqRecovery')
+
+        self._testfq = utils.get_temp_filename('test.fastq')
+        shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
+
         screed.read_fastq_sequences(self._testfq)
         screed.ToFastq(self._testfq, self._fileName)
         screed.read_fastq_sequences(self._fileName)
diff --git a/screed/tests/test_hava_methods.py b/screed/tests/test_hava_methods.py
index fba7859..e803a50 100644
--- a/screed/tests/test_hava_methods.py
+++ b/screed/tests/test_hava_methods.py
@@ -1,9 +1,13 @@
+from __future__ import absolute_import
 import screed
 import screed.seqparse
 from screed.DBConstants import fileExtension
 import os
+from . import screed_tst_utils as utils
+import shutil
 
-testha = os.path.join(os.path.dirname(__file__), 'test.hava')
+testha = utils.get_temp_filename('test.hava')
+shutil.copy(utils.get_test_data('test.hava'), testha)
 
 
 class test_hava(object):
diff --git a/screed/tests/test_open.py b/screed/tests/test_open.py
index b9be0be..548b370 100644
--- a/screed/tests/test_open.py
+++ b/screed/tests/test_open.py
@@ -1,27 +1,57 @@
+# Copyright (c) 2008-2015, Michigan State University
+
+from __future__ import absolute_import
+
 import os.path
 import sys
+import subprocess
 
+from . import screed_tst_utils as utils
 import screed
 import screed.openscreed
 
 
 def test_empty_open():
-    filename = os.path.join(os.path.dirname(__file__), 'empty.fa')
-    assert len(list(iter(screed.open(filename)))) == 0
+    filename = utils.get_test_data('empty.fa')
+    assert len(list(screed.open(filename))) == 0
+
+
+def test_open_maps_dash():
+    """Test mapping of '-'."""
+    #  pylint: disable=protected-access
+    filename = '-'
+    mapped = screed.openscreed._normalize_filename(filename)
+
+    assert '/dev/stdin' == mapped
+
+
+def test_open_stdin():
+    """Test feeding data through stdin.
+
+    Uses a subprocess with the data file directlyused as stdin."""
+    filename1 = utils.get_test_data('test.fa')
+    command = ["python", "-c", "from __future__ import print_function;"
+               "import screed; print(list(screed.open('-')))"]
+    with open(filename1, 'rb') as data_file:
+        output = subprocess.Popen(command,
+                                  stdin=data_file, universal_newlines=True,
+                                  stdout=subprocess.PIPE).communicate()[0]
+        assert "'name': 'ENSMICT00000012722'" \
+            or "'name': u'ENSMICT00000012722'" in output, output
 
 
 def test_simple_open():
-    filename = os.path.join(os.path.dirname(__file__), 'test.fa')
+    filename = utils.get_test_data('test.fa')
 
     n = -1
-    for n, record in enumerate(screed.open(filename)):
+    for n, record in enumerate(screed.open(filename, parse_description=True)):
         assert record.name == 'ENSMICT00000012722'
         break
     assert n == 0, n
 
 
 def test_simple_open_fq():
-    filename = os.path.join(os.path.dirname(__file__), 'test.fastq')
+    filename = utils.get_test_data('test.fastq')
 
     n = -1
     for n, record in enumerate(screed.open(filename)):
@@ -31,8 +61,8 @@ def test_simple_open_fq():
 
 
 def test_gz_open():
-    filename1 = os.path.join(os.path.dirname(__file__), 'test.fa')
-    filename2 = os.path.join(os.path.dirname(__file__), 'test.fa.gz')
+    filename1 = utils.get_test_data('test.fa')
+    filename2 = utils.get_test_data('test.fa.gz')
     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
                                      screed.open(filename2))):
         assert r1.name == r2.name
@@ -41,8 +71,8 @@ def test_gz_open():
 
 
 def test_bz2_open():
-    filename1 = os.path.join(os.path.dirname(__file__), 'test.fa')
-    filename2 = os.path.join(os.path.dirname(__file__), 'test.fa.bz2')
+    filename1 = utils.get_test_data('test.fa')
+    filename2 = utils.get_test_data('test.fa.bz2')
     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
                                      screed.open(filename2))):
         assert r1.name == r2.name
@@ -51,8 +81,8 @@ def test_bz2_open():
 
 
 def test_gz_open_fastq():
-    filename1 = os.path.join(os.path.dirname(__file__), 'test.fastq')
-    filename2 = os.path.join(os.path.dirname(__file__), 'test.fastq.gz')
+    filename1 = utils.get_test_data('test.fastq')
+    filename2 = utils.get_test_data('test.fastq.gz')
     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
                                      screed.open(filename2))):
         assert r1.name == r2.name
@@ -63,7 +93,7 @@ def test_gz_open_fastq():
 def test_get_writer_class_fasta():
     import screed.fasta
 
-    filename = os.path.join(os.path.dirname(__file__), 'test.fa')
+    filename = utils.get_test_data('test.fa')
 
     read_iter = screed.open(filename)
     x = screed.openscreed.get_writer_class(read_iter)
diff --git a/screed/tests/test_open_cm.py b/screed/tests/test_open_cm.py
new file mode 100644
index 0000000..0009b0d
--- /dev/null
+++ b/screed/tests/test_open_cm.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2008-2015, Michigan State University
+
+from . import screed_tst_utils as utils
+import screed
+import screed.openscreed
+
+
+def test_empty_open():
+    filename = utils.get_test_data('empty.fa')
+    with screed.open(filename) as f:
+        assert len(list(f)) == 0
+
+
+def test_simple_open():
+    filename = utils.get_test_data('test.fa')
+
+    n = -1
+    with screed.open(filename, parse_description=True) as f:
+        for n, record in enumerate(f):
+            assert record.name == 'ENSMICT00000012722'
+            break
+
+        assert n == 0, n
+
+
+def test_simple_close():
+    filename = utils.get_test_data('test.fa')
+
+    n = -1
+    f = screed.open(filename, parse_description=True)
+    for n, record in enumerate(f):
+        assert record.name == 'ENSMICT00000012722'
+        break
+
+    assert n == 0, n
+    f.close()
+
+
+def test_simple_open_fq():
+    filename = utils.get_test_data('test.fastq')
+
+    n = -1
+    with screed.open(filename) as f:
+        for n, record in enumerate(f):
+            assert record.name == 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'
+            break
+
+        assert n == 0
+
+
+def test_gz_open():
+    filename1 = utils.get_test_data('test.fa')
+    filename2 = utils.get_test_data('test.fa.gz')
+    with screed.open(filename1) as f1, screed.open(filename2) as f2:
+        for n, (r1, r2) in enumerate(zip(f1, f2)):
+            assert r1.name == r2.name
+
+        assert n > 0
+
+
+def test_bz2_open():
+    filename1 = utils.get_test_data('test.fa')
+    filename2 = utils.get_test_data('test.fa.bz2')
+    with screed.open(filename1) as f1, screed.open(filename2) as f2:
+        for n, (r1, r2) in enumerate(zip(f1, f2)):
+            assert r1.name == r2.name
+
+        assert n > 0
+
+
+def test_gz_open_fastq():
+    filename1 = utils.get_test_data('test.fastq')
+    filename2 = utils.get_test_data('test.fastq.gz')
+    with screed.open(filename1) as f1, screed.open(filename2) as f2:
+        for n, (r1, r2) in enumerate(zip(f1, f2)):
+            assert r1.name == r2.name
+
+        assert n > 0
+
+
+def test_get_writer_class_fasta():
+    import screed.fasta
+
+    filename = utils.get_test_data('test.fa')
+
+    with screed.open(filename) as f:
+        x = screed.openscreed.get_writer_class(f)
+        assert x is screed.fasta.FASTA_Writer, x
+
+
+def test_unknown_fileformat():
+    try:
+        with screed.open(__file__):
+            pass
+    except ValueError as err:
+        assert "unknown file format" in str(err)
diff --git a/screed/tests/test_pygr_api.py b/screed/tests/test_pygr_api.py
index 8c46a43..0f37dd0 100644
--- a/screed/tests/test_pygr_api.py
+++ b/screed/tests/test_pygr_api.py
@@ -2,6 +2,8 @@
 Test the pygr API.
 """
 
+from __future__ import absolute_import, unicode_literals
+
 try:
     import pygr
 except ImportError:
@@ -12,7 +14,7 @@ import screed
 from screed.DBConstants import fileExtension
 from screed.pygr_api import ScreedSequenceDB, ScreedSequenceDB_ByIndex
 from pickle import dump, load
-from cStringIO import StringIO
+from io import StringIO
 import os
 
 testfa = os.path.join(os.path.dirname(__file__), 'test.fa')
diff --git a/screed/tests/test_shell.py b/screed/tests/test_shell.py
index 1c3aa78..c622dec 100644
--- a/screed/tests/test_shell.py
+++ b/screed/tests/test_shell.py
@@ -1,9 +1,12 @@
-import test_fasta
-import test_fastq
+from __future__ import absolute_import
+from . import test_fasta
+from . import test_fastq
 import os
 import subprocess
 import screed
 from screed.DBConstants import fileExtension
+from . import screed_tst_utils as utils
+import shutil
 
 
 class Test_fa_shell(test_fasta.Test_fasta):
@@ -15,7 +18,10 @@ class Test_fa_shell(test_fasta.Test_fasta):
 
     def setup(self):
         thisdir = os.path.dirname(__file__)
-        self._testfa = os.path.join(thisdir, 'test.fa')
+
+        self._testfa = utils.get_temp_filename('test.fa')
+        shutil.copy(utils.get_test_data('test.fa'), self._testfa)
+
         fadbm = os.path.join(thisdir, '..', 'fadbm.py')
         subprocess.check_call(['python', fadbm, self._testfa],
                               stdout=subprocess.PIPE)
@@ -34,7 +40,9 @@ class Test_fq_shell(test_fastq.Test_fastq):
 
     def setup(self):
         thisdir = os.path.dirname(__file__)
-        self._testfq = os.path.join(thisdir, 'test.fastq')
+
+        self._testfq = utils.get_temp_filename('test.fastq')
+        shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
 
         fqdbm = os.path.join(thisdir, '..', 'fqdbm.py')
         subprocess.check_call(['python', fqdbm, self._testfq],
diff --git a/screed/tests/test_streaming.py b/screed/tests/test_streaming.py
index 30775b2..a3ae342 100644
--- a/screed/tests/test_streaming.py
+++ b/screed/tests/test_streaming.py
@@ -1,13 +1,21 @@
-import test_fasta
-import test_fastq
+# Copyright (c) 2008-2015, Michigan State University
+
+from __future__ import print_function
+from __future__ import absolute_import
+
 import tempfile
 import os
 import sys
 import io
 import threading
 import subprocess
-import screed
+
 from nose.plugins.attrib import attr
+
+import screed
+from . import screed_tst_utils as utils
+from . import test_fasta
+from . import test_fastq
 from screed.DBConstants import fileExtension
 
 
@@ -24,6 +32,7 @@ def streamer(ifilename):
     # Get temp filenames, etc.
     in_dir = tempfile.mkdtemp(prefix="screedtest_")
     fifo = os.path.join(in_dir, 'fifo')
+    ifile = io.open(ifilename, 'rb')
 
     # make a fifo to simulate streaming
     os.mkfifo(fifo)
@@ -35,7 +44,6 @@ def streamer(ifilename):
     thread = threading.Thread(target=streamer_reader, args=[fifo, exception])
     thread.start()
 
-    ifile = io.open(ifilename, 'rb')
     fifofile = io.open(fifo, 'wb')
     # read binary to handle compressed files
     chunk = ifile.read(8192)
@@ -52,34 +60,34 @@ def streamer(ifilename):
 
 
 def test_stream_fa():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fa'))
+    streamer(utils.get_test_data('test.fa'))
 
 
 def test_stream_fq():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fastq'))
+    streamer(utils.get_test_data('test.fastq'))
 
 
 @attr('known_failing')
 def test_stream_fa_gz():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fa.gz'))
+    streamer(utils.get_test_data('test.fa.gz'))
 
 
 def test_stream_gz_fail():
     try:
-        streamer(os.path.join(os.path.dirname(__file__), 'test.fastq.gz'))
+        streamer(utils.get_test_data('test.fastq.gz'))
         assert 0, "This should not work yet"
     except ValueError as err:
-        print str(err)
+        print(str(err))
 
 
 @attr('known_failing')
 def test_stream_fq_gz():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fastq.gz'))
+    streamer(utils.get_test_data('test.fastq.gz'))
 
 
 def test_stream_fa_bz2():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fa.bz2'))
+    streamer(utils.get_test_data('test.fa.bz2'))
 
 
 def test_stream_fq_bz2():
-    streamer(os.path.join(os.path.dirname(__file__), 'test.fastq.bz2'))
+    streamer(utils.get_test_data('test.fastq.bz2'))
diff --git a/screed/utils.py b/screed/utils.py
new file mode 100644
index 0000000..006145b
--- /dev/null
+++ b/screed/utils.py
@@ -0,0 +1,7 @@
+def to_str(line):
+    try:
+        line = line.decode('utf-8')
+    except AttributeError:
+        pass
+
+    return line
diff --git a/setup.py b/setup.py
index 6c963df..6fc12f4 100755
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
+from __future__ import print_function
 try:
     from setuptools import setup
 except ImportError:
-    print '(WARNING: importing distutils, not setuptools!)'
+    print('(WARNING: importing distutils, not setuptools!)')
     from distutils.core import setup
 
 import imp
@@ -21,7 +22,7 @@ setup(name='screed',
       description='A short read database',
       author='Alex Nolley, C. Titus Brown',
       author_email='ctb at msu.edu',
-      url='http://github.com/ged-lab/screed/',
+      url='http://github.com/dib-lab/screed/',
       include_package_data=True,
       packages=['screed', 'screed.tests'],
       package_data={
diff --git a/tox.ini b/tox.ini
index ca5a7d4..57ef524 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,12 +1,7 @@
 [tox]
-envlist = py27, py33
+envlist = py27, py33, py34
 
 [testenv]
-commands = nosetests --with-xcoverage --with-xunit --cover-package=screed --cover-erase
-deps =
-  nosexcover
-  pygr
-
-[testenv:py33]
+commands = nosetests --with-xcoverage --with-xunit --cover-package=screed --cover-erase --attr '!known_failing'
 deps =
   nosexcover
diff --git a/versioneer.py b/versioneer.py
index d3fe56c..c00770f 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,5 +1,5 @@
 
-# Version: 0.13
+# Version: 0.14
 
 """
 The Versioneer
@@ -10,8 +10,12 @@ The Versioneer
 * Brian Warner
 * License: Public Domain
 * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy
-* [![Latest Version](https://pypip.in/version/versioneer/badge.svg?style=flat)](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status](https://travis-ci.org/warner/python-versioneer.png?branch=master)](https://travis-ci.org/warner/python-versioneer)
+* [![Latest Version]
+(https://pypip.in/version/versioneer/badge.svg?style=flat)
+](https://pypi.python.org/pypi/versioneer/)
+* [![Build Status]
+(https://travis-ci.org/warner/python-versioneer.png?branch=master)
+](https://travis-ci.org/warner/python-versioneer)
 
 This is a tool for managing a recorded version number in distutils-based
 python projects. The goal is to remove the tedious and error-prone "update
@@ -142,12 +146,14 @@ To versioneer-enable your project:
 * 2: add the following lines to the top of your `setup.py`, with the
   configuration values you decided earlier:
 
-        import versioneer
-        versioneer.VCS = 'git'
-        versioneer.versionfile_source = 'src/myproject/_version.py'
-        versioneer.versionfile_build = 'myproject/_version.py'
-        versioneer.tag_prefix = '' # tags are like 1.2.0
-        versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
+  ````
+  import versioneer
+  versioneer.VCS = 'git'
+  versioneer.versionfile_source = 'src/myproject/_version.py'
+  versioneer.versionfile_build = 'myproject/_version.py'
+  versioneer.tag_prefix = '' # tags are like 1.2.0
+  versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
+  ````
 
 * 3: add the following arguments to the setup() call in your setup.py:
 
@@ -195,17 +201,19 @@ import the top-level `versioneer.py` and run `get_versions()`.
 Both functions return a dictionary with different keys for different flavors
 of the version string:
 
-* `['version']`: condensed tag+distance+shortid+dirty identifier. For git,
-  this uses the output of `git describe --tags --dirty --always` but strips
-  the tag_prefix. For example "0.11-2-g1076c97-dirty" indicates that the tree
-  is like the "1076c97" commit but has uncommitted changes ("-dirty"), and
-  that this commit is two revisions ("-2-") beyond the "0.11" tag. For
-  released software (exactly equal to a known tag), the identifier will only
-  contain the stripped tag, e.g. "0.11".
+* `['version']`: A condensed PEP440-compliant string, equal to the
+  un-prefixed tag name for actual releases, and containing an additional
+  "local version" section with more detail for in-between builds. For Git,
+  this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe
+  --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates
+  that the tree is like the "1076c97" commit but has uncommitted changes
+  (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11"
+  tag. For released software (exactly equal to a known tag), the identifier
+  will only contain the stripped tag, e.g. "0.11".
 
 * `['full']`: detailed revision identifier. For Git, this is the full SHA1
-  commit id, followed by "-dirty" if the tree contains uncommitted changes,
-  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac-dirty".
+  commit id, followed by ".dirty" if the tree contains uncommitted changes,
+  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty".
 
 Some variants are more useful than others. Including `full` in a bug report
 should allow developers to reconstruct the exact code being tested (or
@@ -214,13 +222,6 @@ developers). `version` is suitable for display in an "about" box or a CLI
 `--version` output: it can be easily compared against release notes and lists
 of bugs fixed in various releases.
 
-In the future, this will also include a
-[PEP-0440](http://legacy.python.org/dev/peps/pep-0440/) -compatible flavor
-(e.g. `1.2.post0.dev123`). This loses a lot of information (and has no room
-for a hash-based revision id), but is safe to use in a `setup.py`
-"`version=`" argument. It also enables tools like *pip* to compare version
-strings and evaluate compatibility constraint declarations.
-
 The `setup.py versioneer` command adds the following text to your
 `__init__.py` to place a basic version in `YOURPROJECT.__version__`:
 
@@ -250,6 +251,14 @@ systems (SVN, etc) in the future.
 
 Nothing special.
 
+## Upgrading to 0.14
+
+0.14 changes the format of the version string. 0.13 and earlier used
+hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a
+plus-separated "local version" section strings, with dot-separated
+components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old
+format, but should be ok with the new one.
+
 ## Future Directions
 
 This tool is designed to make it easily extended to other version-control
@@ -272,10 +281,14 @@ domain.
 
 """
 
-import os, sys, re, subprocess, errno
-from distutils.core import Command
-from distutils.command.sdist import sdist as _sdist
+import errno
+import os
+import re
+import subprocess
+import sys
 from distutils.command.build import build as _build
+from distutils.command.sdist import sdist as _sdist
+from distutils.core import Command
 
 # these configuration settings will be overridden by setup.py after it
 # imports us
@@ -312,14 +325,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %s" % (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
             print("unable to run %s (error)" % args[0])
         return None
     return stdout
-
 LONG_VERSION_PY['git'] = '''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
@@ -328,7 +340,13 @@ LONG_VERSION_PY['git'] = '''
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.13 (https://github.com/warner/python-versioneer)
+# versioneer-0.14 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
 
 # these strings will be replaced by git during git-archive
 git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
@@ -339,12 +357,6 @@ tag_prefix = "%(TAG_PREFIX)s"
 parentdir_prefix = "%(PARENTDIR_PREFIX)s"
 versionfile_source = "%(VERSIONFILE_SOURCE)s"
 
-import errno
-import os
-import re
-import subprocess
-import sys
-
 
 def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
     assert isinstance(commands, list)
@@ -369,7 +381,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %%s" %% (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
@@ -447,13 +459,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
                 print("picking %%s" %% r)
             return {"version": r,
                     "full": keywords["full"].strip()}
-    # no suitable tags, so we use the full revision id
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return {"version": keywords["full"].strip(),
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
             "full": keywords["full"].strip()}
 
 
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%%s' doesn't start with prefix '%%s'"
+            print(fmt %% (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix
+
+    return version, dirty
+
+
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
     # this runs 'git' from the root of the source tree. This only gets called
     # if the git-archive 'subst' keywords were *not* expanded, and
@@ -463,31 +521,33 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %%s" %% root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%%s' doesn't start with prefix '%%s'"
-            print(fmt %% (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
 
 
-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
+def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
@@ -503,7 +563,7 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in range(len(versionfile_source.split('/'))):
+        for i in versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
         return default
@@ -571,13 +631,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
                 print("picking %s" % r)
             return {"version": r,
                     "full": keywords["full"].strip()}
-    # no suitable tags, so we use the full revision id
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return {"version": keywords["full"].strip(),
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
             "full": keywords["full"].strip()}
 
 
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%s' doesn't start with prefix '%s'"
+            print(fmt % (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+
+    return version, dirty
+
+
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
     # this runs 'git' from the root of the source tree. This only gets called
     # if the git-archive 'subst' keywords were *not* expanded, and
@@ -587,28 +693,30 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %s" % root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
 
 
 def do_vcs_install(manifest_in, versionfile_source, ipy):
@@ -643,6 +751,7 @@ def do_vcs_install(manifest_in, versionfile_source, ipy):
         files.append(".gitattributes")
     run_command(GITS, ["add", "--"] + files)
 
+
 def versions_from_parentdir(parentdir_prefix, root, verbose=False):
     # Source tarballs conventionally unpack into a directory that includes
     # both the project name and a version string.
@@ -655,7 +764,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose=False):
     return {"version": dirname[len(parentdir_prefix):], "full": ""}
 
 SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.13) from
+# This file was generated by 'versioneer.py' (0.14) from
 # revision-control system data, or from the parent directory name of an
 # unpacked source archive. Distribution tarballs contain a pre-generated copy
 # of this file.
@@ -667,7 +776,8 @@ def get_versions(default={}, verbose=False):
 
 """
 
-DEFAULT = {"version": "unknown", "full": "unknown"}
+DEFAULT = {"version": "0+unknown", "full": "unknown"}
+
 
 def versions_from_file(filename):
     versions = {}
@@ -685,6 +795,7 @@ def versions_from_file(filename):
 
     return versions
 
+
 def write_to_version_file(filename, versions):
     with open(filename, "w") as f:
         f.write(SHORT_VERSION_PY % versions)
@@ -698,14 +809,18 @@ def get_root():
     except NameError:
         return os.path.dirname(os.path.abspath(sys.argv[0]))
 
+
 def vcs_function(vcs, suffix):
     return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None)
 
+
 def get_versions(default=DEFAULT, verbose=False):
     # returns dict with two keys: 'version' and 'full'
-    assert versionfile_source is not None, "please set versioneer.versionfile_source"
+    assert versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
     assert tag_prefix is not None, "please set versioneer.tag_prefix"
-    assert parentdir_prefix is not None, "please set versioneer.parentdir_prefix"
+    assert parentdir_prefix is not None, \
+        "please set versioneer.parentdir_prefix"
     assert VCS is not None, "please set versioneer.VCS"
 
     # I am in versioneer.py, which must live at the top of the source tree,
@@ -728,40 +843,50 @@ def get_versions(default=DEFAULT, verbose=False):
         vcs_keywords = get_keywords_f(versionfile_abs)
         ver = versions_from_keywords_f(vcs_keywords, tag_prefix)
         if ver:
-            if verbose: print("got version from expanded keyword %s" % ver)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
             return ver
 
     ver = versions_from_file(versionfile_abs)
     if ver:
-        if verbose: print("got version from file %s %s" % (versionfile_abs,ver))
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
         return ver
 
     versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs")
     if versions_from_vcs_f:
         ver = versions_from_vcs_f(tag_prefix, root, verbose)
         if ver:
-            if verbose: print("got version from VCS %s" % ver)
+            if verbose:
+                print("got version from VCS %s" % ver)
             return ver
 
     ver = versions_from_parentdir(parentdir_prefix, root, verbose)
     if ver:
-        if verbose: print("got version from parentdir %s" % ver)
+        if verbose:
+            print("got version from parentdir %s" % ver)
         return ver
 
-    if verbose: print("got version from default %s" % default)
+    if verbose:
+        print("got version from default %s" % default)
     return default
 
+
 def get_version(verbose=False):
     return get_versions(verbose=verbose)["version"]
 
+
 class cmd_version(Command):
     description = "report generated version string"
     user_options = []
     boolean_options = []
+
     def initialize_options(self):
         pass
+
     def finalize_options(self):
         pass
+
     def run(self):
         ver = get_version(verbose=True)
         print("Version is currently: %s" % ver)
@@ -774,7 +899,8 @@ class cmd_build(_build):
         # now locate _version.py in the new build/ directory and replace it
         # with an updated value
         if versionfile_build:
-            target_versionfile = os.path.join(self.build_lib, versionfile_build)
+            target_versionfile = os.path.join(self.build_lib,
+                                              versionfile_build)
             print("UPDATING %s" % target_versionfile)
             os.unlink(target_versionfile)
             with open(target_versionfile, "w") as f:
@@ -803,6 +929,7 @@ if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
                                 "VERSIONFILE_SOURCE": versionfile_source,
                                 })
 
+
 class cmd_sdist(_sdist):
     def run(self):
         versions = get_versions(verbose=True)
@@ -827,14 +954,19 @@ __version__ = get_versions()['version']
 del get_versions
 """
 
+
 class cmd_update_files(Command):
-    description = "install/upgrade Versioneer files: __init__.py SRC/_version.py"
+    description = ("install/upgrade Versioneer files: "
+                   "__init__.py SRC/_version.py")
     user_options = []
     boolean_options = []
+
     def initialize_options(self):
         pass
+
     def finalize_options(self):
         pass
+
     def run(self):
         print(" creating %s" % versionfile_source)
         with open(versionfile_source, "w") as f:
@@ -900,6 +1032,7 @@ class cmd_update_files(Command):
         # substitution.
         do_vcs_install(manifest_in, versionfile_source, ipy)
 
+
 def get_cmdclass():
     cmds = {'version': cmd_version,
             'versioneer': cmd_update_files,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-screed.git