[med-svn] [poretools] 01/01: Imported Upstream version 0.6.0+dfsg

Afif Elghraoui afif at moszumanska.debian.org
Mon Sep 5 00:32:21 UTC 2016


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch upstream
in repository poretools.

commit 6e8fab5a1e1c7d94b8fb61cde3730ac40194e129
Author: Afif Elghraoui <afif at debian.org>
Date:   Sun Sep 4 16:25:13 2016 -0700

    Imported Upstream version 0.6.0+dfsg
---
 .gitignore                                         |   4 +
 Dockerfile                                         |  40 --
 MANIFEST.in                                        |   2 +-
 README.md                                          |  23 +-
 appveyor.yml                                       | 145 ++++++
 appveyor/install.ps1                               | 229 ++++++++++
 appveyor/run_with_env.cmd                          |  88 ++++
 dist/poretools-0.3.0.win-amd64.exe                 | Bin 284988 -> 0 bytes
 dist/poretools-0.3.1.win-amd64.exe                 | Bin 285930 -> 0 bytes
 dist/poretools-0.5.0.win-amd64.exe                 | Bin 288186 -> 0 bytes
 dist/poretools.reg                                 |   8 -
 docs/conf.py                                       |   2 +-
 docs/content/_images/qualpos.png                   | Bin 0 -> 24494 bytes
 docs/content/examples.rst                          | 193 +++++---
 docs/content/history.rst                           |  11 +
 docs/content/installation.rst                      |  83 +---
 docs/index.rst                                     |   8 +-
 docs/requirements.txt                              |   1 +
 poretools/Event.py                                 |  70 ++-
 poretools/Fast5File.py                             | 287 +++++++++++-
 poretools/Fast5File_pytables.py                    | 504 ---------------------
 poretools/events.py                                |  16 +-
 poretools/fasta.py                                 |   8 +-
 poretools/fastq.py                                 |  23 +-
 poretools/formats.py                               |  14 +
 poretools/hist.py                                  |  93 ++--
 poretools/index.py                                 |  66 +++
 poretools/metadata.py                              |  22 +
 poretools/occupancy.py                             | 229 +++-------
 poretools/organise.py                              |  39 ++
 poretools/poretools_main.py                        | 174 ++++++-
 poretools/qual_v_pos.py                            |  66 +++
 poretools/squiggle.py                              | 154 +++----
 poretools/times.py                                 |   2 +-
 poretools/version.py                               |   2 +-
 poretools/windows.py                               |  19 -
 poretools/yield_plot.py                            | 200 ++++----
 requirements.txt                                   |   6 +-
 setup.py                                           |  61 +--
 .../2016_3_4_3507_1_ch120_read240_strand.fast5     | Bin 0 -> 1570393 bytes
 .../2016_3_4_3507_1_ch120_read353_strand.fast5     | Bin 0 -> 1238287 bytes
 .../2016_3_4_3507_1_ch120_read415_strand.fast5     | Bin 0 -> 1128165 bytes
 .../2016_3_4_3507_1_ch120_read418_strand.fast5     | Bin 0 -> 1289709 bytes
 .../2016_3_4_3507_1_ch120_read433_strand.fast5     | Bin 0 -> 2005087 bytes
 .../2016_3_4_3507_1_ch120_read443_strand.fast5     | Bin 0 -> 887162 bytes
 .../2016_3_4_3507_1_ch120_read505_strand.fast5     | Bin 0 -> 1034554 bytes
 .../2016_3_4_3507_1_ch120_read521_strand.fast5     | Bin 0 -> 1262508 bytes
 .../2016_3_4_3507_1_ch120_read542_strand.fast5     | Bin 0 -> 2053955 bytes
 .../2016_3_4_3507_1_ch120_read586_strand.fast5     | Bin 0 -> 1370058 bytes
 .../2016_3_4_3507_1_ch120_read635_strand.fast5     | Bin 0 -> 1415531 bytes
 .../2016_3_4_3507_1_ch120_read706_strand.fast5     | Bin 0 -> 1342813 bytes
 .../2016_3_4_3507_1_ch120_read83_strand.fast5      | Bin 0 -> 1583898 bytes
 .../2016_3_4_3507_1_ch120_read89_strand.fast5      | Bin 0 -> 978140 bytes
 .../2016_3_4_3507_1_ch126_read1066_strand.fast5    | Bin 0 -> 1349302 bytes
 .../2016_3_4_3507_1_ch126_read1079_strand.fast5    | Bin 0 -> 1879525 bytes
 .../2016_3_4_3507_1_ch126_read1169_strand.fast5    | Bin 0 -> 1119763 bytes
 .../2016_3_4_3507_1_ch126_read1250_strand.fast5    | Bin 0 -> 1167710 bytes
 .../2016_3_4_3507_1_ch126_read1377_strand.fast5    | Bin 0 -> 828114 bytes
 .../2016_3_4_3507_1_ch126_read1387_strand.fast5    | Bin 0 -> 1031775 bytes
 .../2016_3_4_3507_1_ch126_read160_strand.fast5     | Bin 0 -> 952615 bytes
 .../2016_3_4_3507_1_ch126_read217_strand.fast5     | Bin 0 -> 1920500 bytes
 .../2016_3_4_3507_1_ch126_read223_strand.fast5     | Bin 0 -> 1116589 bytes
 .../2016_3_4_3507_1_ch126_read249_strand.fast5     | Bin 0 -> 1402714 bytes
 .../2016_3_4_3507_1_ch126_read324_strand.fast5     | Bin 0 -> 1498465 bytes
 .../2016_3_4_3507_1_ch126_read326_strand.fast5     | Bin 0 -> 852262 bytes
 .../2016_3_4_3507_1_ch126_read382_strand.fast5     | Bin 0 -> 1031446 bytes
 .../2016_3_4_3507_1_ch126_read42_strand.fast5      | Bin 0 -> 1858993 bytes
 .../2016_3_4_3507_1_ch126_read501_strand.fast5     | Bin 0 -> 1870086 bytes
 .../2016_3_4_3507_1_ch126_read562_strand.fast5     | Bin 0 -> 2290355 bytes
 .../2016_3_4_3507_1_ch126_read601_strand.fast5     | Bin 0 -> 1096349 bytes
 .../2016_3_4_3507_1_ch126_read618_strand.fast5     | Bin 0 -> 1225540 bytes
 .../2016_3_4_3507_1_ch126_read700_strand.fast5     | Bin 0 -> 1332148 bytes
 .../2016_3_4_3507_1_ch126_read743_strand.fast5     | Bin 0 -> 1688147 bytes
 .../2016_3_4_3507_1_ch126_read831_strand.fast5     | Bin 0 -> 1437489 bytes
 .../2016_3_4_3507_1_ch126_read833_strand.fast5     | Bin 0 -> 1577416 bytes
 .../2016_3_4_3507_1_ch126_read843_strand.fast5     | Bin 0 -> 1575973 bytes
 .../2016_3_4_3507_1_ch126_read857_strand.fast5     | Bin 0 -> 1320571 bytes
 .../2016_3_4_3507_1_ch126_read899_strand.fast5     | Bin 0 -> 1070267 bytes
 .../2016_3_4_3507_1_ch126_read914_strand.fast5     | Bin 0 -> 956454 bytes
 .../2016_3_4_3507_1_ch126_read940_strand.fast5     | Bin 0 -> 1030131 bytes
 .../2016_3_4_3507_1_ch126_read969_strand.fast5     | Bin 0 -> 1039441 bytes
 .../2016_3_4_3507_1_ch128_read204_strand.fast5     | Bin 0 -> 1061048 bytes
 .../2016_3_4_3507_1_ch128_read270_strand.fast5     | Bin 0 -> 1469918 bytes
 .../2016_3_4_3507_1_ch128_read361_strand.fast5     | Bin 0 -> 1684091 bytes
 .../2016_3_4_3507_1_ch128_read365_strand.fast5     | Bin 0 -> 1530857 bytes
 .../2016_3_4_3507_1_ch128_read376_strand.fast5     | Bin 0 -> 1902400 bytes
 .../2016_3_4_3507_1_ch128_read384_strand.fast5     | Bin 0 -> 1183780 bytes
 .../2016_3_4_3507_1_ch128_read404_strand.fast5     | Bin 0 -> 1709831 bytes
 .../2016_3_4_3507_1_ch128_read422_strand.fast5     | Bin 0 -> 1177979 bytes
 .../2016_3_4_3507_1_ch128_read430_strand.fast5     | Bin 0 -> 811659 bytes
 .../2016_3_4_3507_1_ch128_read503_strand.fast5     | Bin 0 -> 1605461 bytes
 .../2016_3_4_3507_1_ch128_read521_strand.fast5     | Bin 0 -> 1509399 bytes
 .../2016_3_4_3507_1_ch128_read635_strand.fast5     | Bin 0 -> 1608095 bytes
 .../2016_3_4_3507_1_ch128_read647_strand.fast5     | Bin 0 -> 1040252 bytes
 .../2016_3_4_3507_1_ch128_read723_strand.fast5     | Bin 0 -> 1540447 bytes
 .../2016_3_4_3507_1_ch128_read753_strand.fast5     | Bin 0 -> 1366371 bytes
 .../2016_3_4_3507_1_ch128_read763_strand.fast5     | Bin 0 -> 1192962 bytes
 .../2016_3_4_3507_1_ch128_read783_strand.fast5     | Bin 0 -> 1556061 bytes
 .../2016_3_4_3507_1_ch128_read790_strand.fast5     | Bin 0 -> 1625467 bytes
 .../2016_3_4_3507_1_ch128_read95_strand.fast5      | Bin 0 -> 781306 bytes
 .../2016_3_4_3507_1_ch13_read1130_strand.fast5     | Bin 0 -> 1472869 bytes
 .../2016_3_4_3507_1_ch13_read1132_strand.fast5     | Bin 0 -> 1730815 bytes
 .../2016_3_4_3507_1_ch13_read1150_strand.fast5     | Bin 0 -> 1361446 bytes
 .../2016_3_4_3507_1_ch13_read1404_strand.fast5     | Bin 0 -> 1426971 bytes
 .../2016_3_4_3507_1_ch13_read1414_strand.fast5     | Bin 0 -> 1135261 bytes
 .../2016_3_4_3507_1_ch13_read1456_strand.fast5     | Bin 0 -> 1440622 bytes
 .../2016_3_4_3507_1_ch13_read1474_strand.fast5     | Bin 0 -> 1305443 bytes
 ...K12_1D_R9_SpotON_41280_ch52_read58_strand.fast5 | Bin 0 -> 1914989 bytes
 108 files changed, 1664 insertions(+), 1228 deletions(-)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a204982
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*.pyc
+build/
+dist/
+*.egg-info
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 6f9ebe0..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,40 +0,0 @@
-###############################################
-# Dockerfile to build poretools container image
-# Based on Ubuntu 14.04
-# Build with:
-#   sudo docker build -t poretools .
-###############################################
-
-# Use ubuntu 14.04 base image
-FROM ubuntu:14.04
-
-# set non-interactive mode
-ENV DEBIAN_FRONTEND noninteractive
-
-############# BEGIN INSTALLATION ##############
-
-# Prepare to install R
-RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9
-RUN echo 'deb http://cran.rstudio.com/bin/linux/ubuntu trusty/' >> /etc/apt/sources.list
-RUN apt-get update
-
-# Install dependencies
-RUN apt-get -y install git python-tables python-setuptools python-pip python-dev cython libhdf5-serial-dev r-base python-rpy2
-
-# Upgrade numexpr
-RUN pip install numexpr --upgrade
-
-# Install R packages
-RUN Rscript -e 'options("repos" = c(CRAN = "http://cran.rstudio.com/")); install.packages("codetools"); install.packages("MASS"); install.packages("ggplot2")'
-
-# Install poretools
-RUN git clone https://github.com/arq5x/poretools /tmp/poretools
-RUN cd /tmp/poretools && python setup.py install
-
-############## INSTALLATION END ##############
-
-# Set entrypoint so container can be used as executable
-ENTRYPOINT ["poretools"]
-
-# File author/maintainer info
-MAINTAINER Stephen Turner <lastname at virginia dot edu>
diff --git a/MANIFEST.in b/MANIFEST.in
index d54bfb5..540b720 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1 @@
-requirements.txt
\ No newline at end of file
+include requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index 4bcd327..a432fe4 100644
--- a/README.md
+++ b/README.md
@@ -2,22 +2,19 @@
 
 *Nick Loman and Aaron Quinlan*
 
-**Note:** this software is in an alpha state; the code is changing rapidly and the API and CLI may change at any time.
-
 Complete installation instructions and usage examples can be found on the [poretools documentation site](http://poretools.readthedocs.org).
 
 Requirements
 ===================
 - HDF5 >= 1.8.7 (http://www.hdfgroup.org/HDF5/)
-- R >= 3.0.0
 - Python >= 2.7
-- rpy2 >= 2.4.2
-- h5py >= 2.0
-
-
-
-
-
-
-[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/arq5x/poretools/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
-
+- h5py >= 2.2
+- matplotlib
+- seaborn
+- pandas
+
+Contributors
+============
+ at arq5x
+ at nickloman
+ at brentp
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..99cf349
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,145 @@
+environment:
+  HDF5_LIBDIR: C:\HDF5
+
+  global:
+    # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
+    # /E:ON and /V:ON options are not enabled in the batch script intepreter
+    # See: http://stackoverflow.com/a/13751649/163740
+    CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
+
+  matrix:
+
+    - PYTHON: "C:\\Python27-x64"
+      PYTHON_VERSION: "2.7.x" # currently 2.7.9
+      PYTHON_ARCH: "64"
+
+
+    # Python 2.7.10 is the latest version and is not pre-installed.
+    - PYTHON: "C:\\Python27.10-x64"
+      PYTHON_VERSION: "2.7.10"
+      PYTHON_ARCH: "64"
+
+
+    - PYTHON: "C:\\Python27.10"
+      PYTHON_VERSION: "2.7.10"
+      PYTHON_ARCH: "32"
+
+    # Pre-installed Python versions, which Appveyor may upgrade to
+    # a later point release.
+    # See: http://www.appveyor.com/docs/installed-software#python
+    - PYTHON: "C:\\Python27"
+      PYTHON_VERSION: "2.7.x" # currently 2.7.9
+      PYTHON_ARCH: "32"
+
+    # Major and minor releases (i.e x.0.0 and x.y.0) prior to 3.3.0 use
+    # a different naming scheme.
+
+    #- PYTHON: "C:\\Python270"
+    #  PYTHON_VERSION: "2.7.0"
+    #  PYTHON_ARCH: "32"
+    #
+    #- PYTHON: "C:\\Python270-x64"
+    #  PYTHON_VERSION: "2.7.0"
+    #  PYTHON_ARCH: "64"
+
+install:
+  # If there is a newer build queued for the same PR, cancel this one.
+  # The AppVeyor 'rollout builds' option is supposed to serve the same
+  # purpose but it is problematic because it tends to cancel builds pushed
+  # directly to master instead of just PR builds (or the converse).
+  # credits: JuliaLang developers.
+  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
+        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
+        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
+          throw "There are newer queued builds for this pull request, failing early." }
+  - ECHO "Filesystem root:"
+  - ps: "ls \"C:/\""
+
+  - ECHO "Installed SDKs:"
+  - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\""
+
+  # Install Python (from the official .msi of http://python.org) and pip when
+  # not already installed.
+  #- ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
+  - ps: "appveyor\\install.ps1"
+
+   # https://github.com/aldanor/hdf5-rs/blob/master/appveyor.yml
+  - cmd: mkdir C:\HDF5
+   #- cmd: mkdir C:\HDF5\lib
+   #- ps: Invoke-WebRequest "https://github.com/kkirstein/hdf5-rs/releases/download/alpha/hdf5.dll" -OutFile "C:\HDF5\lib\hdf5.dll"
+   #- cmd: set PATH=C:\HDF5\bin;%PATH%
+   #- cmd: set PATH=C:\HDF5\bin;%PATH%
+   #- ps: Invoke-WebRequest "https://github.com/kkirstein/hdf5-rs/releases/download/alpha/hdf5.dll" -OutFile "C:\HDF5\lib\hdf5.dll"
+
+  # Prepend newly installed Python to the PATH of this build (this cannot be
+  # done from inside the powershell script as it would require to restart
+  # the parent CMD process).
+  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%;C:\\HDF5"
+
+    # Check that we have the expected version and architecture for Python
+  - "python --version"
+  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
+
+  # Upgrade to the latest version of pip to avoid it displaying warnings
+  # about it being out of date.
+  - "pip install --disable-pip-version-check --user --upgrade pip"
+
+  - "%CMD_IN_ENV% pip install wheel"
+  - "%CMD_IN_ENV% pip install nose"
+
+  # Install the build dependencies of the project. If some dependencies contain
+  # compiled extensions and are not provided as pre-built wheel packages,
+  # pip will build them from source using the MSVC compiler matching the
+  # target Python version and architecture
+  #
+  - ps: if ($env:PYTHON_ARCH -eq "32") { `
+        Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/h5py-2.5.0-cp27-none-win32.whl" -OutFile "C:\HDF5\h5py-2.5.0-cp27-none-win32.whl";
+        pip install C:\HDF5\h5py-2.5.0-cp27-none-win32.whl `
+        } else { `
+        Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/h5py-2.5.0-cp27-none-win_amd64.whl" -OutFile "C:\HDF5\h5py-2.5.0-cp27-none-win_amd64.whl";
+        pip install C:\HDF5\h5py-2.5.0-cp27-none-win_amd64.whl `
+        }
+
+  - ps: if ($env:PYTHON_ARCH -eq "32") { `
+        Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/matplotlib-1.5.1-cp27-none-win32.whl" -OutFile "C:\HDF5\matplotlib-1.5.1-cp27-none-win32.whl";
+        pip install C:\HDF5\matplotlib-1.5.1-cp27-none-win32.whl `
+        } else { `
+        Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/matplotlib-1.5.1-cp27-none-win_amd64.whl" -OutFile "C:\HDF5\matplotlib-1.5.1-cp27-none-win_amd64.whl";
+        pip install C:\HDF5\matplotlib-1.5.1-cp27-none-win_amd64.whl `
+        }
+
+  #- ps: Invoke-WebRequest "https://pypi.python.org/packages/2.7/h/h5py/h5py-2.5.0.win32-py2.7.exe" -OutFile "C:\HDF5\h5py-2.5.0.win32-py2.7.exe"
+  #- ps: Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/h5py-2.5.0-cp27-none-win%PYTHON_ARCH%.whl" -OutFile "C:\HDF5\h5py-2.5.0-cp27-none-win%PYTHON_ARCH%.whl"
+    #- "%CMD_IN_ENV% C:\\HDF5\\h5py-2.5.0.win32-py2.7.exe"
+    #- "%CMD_IN_ENV% pip install C:\\HDF5\\h5py-2.5.0-cp27-none-win.whl"
+    #- "%CMD_IN_ENV% pip install -r requirements.txt"
+    #
+    #- ps: Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/matplotlib-1.5.1-cp27-none-win32.whl" -OutFile "C:\HDF5\matplotlib-1.5.1-cp27-none-win32.whl"
+    #- "%CMD_IN_ENV% C:\\HDF5\\h5py-2.5.0.win32-py2.7.exe"
+    #- "%CMD_IN_ENV% pip install C:\\HDF5\\matplotlib-1.5.1-cp27-none-win32.whl"
+
+
+  - ps: Invoke-WebRequest "http://www.lfd.uci.edu/~gohlke/pythonlibs/djcobkfp/seaborn-0.7.0-py2.py3-none-any.whl" -OutFile "C:\HDF5\seaborn-0.7.0-py2.py3-none-any.whl"
+    #- "%CMD_IN_ENV% C:\\HDF5\\h5py-2.5.0.win32-py2.7.exe"
+  - ps: pip install C:\\HDF5\\seaborn-0.7.0-py2.py3-none-any.whl
+
+
+build_script:
+  # Build the compiled extension
+  - "%CMD_IN_ENV% python setup.py build"
+  - "%CMD_IN_ENV% python setup.py install"
+
+test_script:
+  # Run the project tests
+  - "%CMD_IN_ENV% python setup.py nosetests"
+
+after_test:
+  # If tests are successful, create binary packages for the project.
+  - "%CMD_IN_ENV% python setup.py bdist_wheel"
+  - "%CMD_IN_ENV% python setup.py bdist_wininst"
+  - "%CMD_IN_ENV% python setup.py bdist_msi"
+  - ps: "ls dist"
+
+artifacts:
+  # Archive the generated packages in the ci.appveyor.com build report.
+  - path: dist\*
diff --git a/appveyor/install.ps1 b/appveyor/install.ps1
new file mode 100644
index 0000000..160ba55
--- /dev/null
+++ b/appveyor/install.ps1
@@ -0,0 +1,229 @@
+# Sample script to install Python and pip under Windows
+# Authors: Olivier Grisel, Jonathan Helmus, Kyle Kastner, and Alex Willmer
+# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
+
+$MINICONDA_URL = "http://repo.continuum.io/miniconda/"
+$BASE_URL = "https://www.python.org/ftp/python/"
+$GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py"
+$GET_PIP_PATH = "C:\get-pip.py"
+
+$PYTHON_PRERELEASE_REGEX = @"
+(?x)
+(?<major>\d+)
+\.
+(?<minor>\d+)
+\.
+(?<micro>\d+)
+(?<prerelease>[a-z]{1,2}\d+)
+"@
+
+
+function Download ($filename, $url) {
+    $webclient = New-Object System.Net.WebClient
+
+    $basedir = $pwd.Path + "\"
+    $filepath = $basedir + $filename
+    if (Test-Path $filename) {
+        Write-Host "Reusing" $filepath
+        return $filepath
+    }
+
+    # Download and retry up to 3 times in case of network transient errors.
+    Write-Host "Downloading" $filename "from" $url
+    $retry_attempts = 2
+    for ($i = 0; $i -lt $retry_attempts; $i++) {
+        try {
+            $webclient.DownloadFile($url, $filepath)
+            break
+        }
+        Catch [Exception]{
+            Start-Sleep 1
+        }
+    }
+    if (Test-Path $filepath) {
+        Write-Host "File saved at" $filepath
+    } else {
+        # Retry once to get the error message if any at the last try
+        $webclient.DownloadFile($url, $filepath)
+    }
+    return $filepath
+}
+
+
+function ParsePythonVersion ($python_version) {
+    if ($python_version -match $PYTHON_PRERELEASE_REGEX) {
+        return ([int]$matches.major, [int]$matches.minor, [int]$matches.micro,
+                $matches.prerelease)
+    }
+    $version_obj = [version]$python_version
+    return ($version_obj.major, $version_obj.minor, $version_obj.build, "")
+}
+
+
+function DownloadPython ($python_version, $platform_suffix) {
+    $major, $minor, $micro, $prerelease = ParsePythonVersion $python_version
+
+    if (($major -le 2 -and $micro -eq 0) `
+        -or ($major -eq 3 -and $minor -le 2 -and $micro -eq 0) `
+        ) {
+        $dir = "$major.$minor"
+        $python_version = "$major.$minor$prerelease"
+    } else {
+        $dir = "$major.$minor.$micro"
+    }
+
+    if ($prerelease) {
+        if (($major -le 2) `
+            -or ($major -eq 3 -and $minor -eq 1) `
+            -or ($major -eq 3 -and $minor -eq 2) `
+            -or ($major -eq 3 -and $minor -eq 3) `
+            ) {
+            $dir = "$dir/prev"
+        }
+    }
+
+    if (($major -le 2) -or ($major -le 3 -and $minor -le 4)) {
+        $ext = "msi"
+        if ($platform_suffix) {
+            $platform_suffix = ".$platform_suffix"
+        }
+    } else {
+        $ext = "exe"
+        if ($platform_suffix) {
+            $platform_suffix = "-$platform_suffix"
+        }
+    }
+
+    $filename = "python-$python_version$platform_suffix.$ext"
+    $url = "$BASE_URL$dir/$filename"
+    $filepath = Download $filename $url
+    return $filepath
+}
+
+
+function InstallPython ($python_version, $architecture, $python_home) {
+    Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
+    if (Test-Path $python_home) {
+        Write-Host $python_home "already exists, skipping."
+        return $false
+    }
+    if ($architecture -eq "32") {
+        $platform_suffix = ""
+    } else {
+        $platform_suffix = "amd64"
+    }
+    $installer_path = DownloadPython $python_version $platform_suffix
+    $installer_ext = [System.IO.Path]::GetExtension($installer_path)
+    Write-Host "Installing $installer_path to $python_home"
+    $install_log = $python_home + ".log"
+    if ($installer_ext -eq '.msi') {
+        InstallPythonMSI $installer_path $python_home $install_log
+    } else {
+        InstallPythonEXE $installer_path $python_home $install_log
+    }
+    if (Test-Path $python_home) {
+        Write-Host "Python $python_version ($architecture) installation complete"
+    } else {
+        Write-Host "Failed to install Python in $python_home"
+        Get-Content -Path $install_log
+        Exit 1
+    }
+}
+
+
+function InstallPythonEXE ($exepath, $python_home, $install_log) {
+    $install_args = "/quiet InstallAllUsers=1 TargetDir=$python_home"
+    RunCommand $exepath $install_args
+}
+
+
+function InstallPythonMSI ($msipath, $python_home, $install_log) {
+    $install_args = "/qn /log $install_log /i $msipath TARGETDIR=$python_home"
+    $uninstall_args = "/qn /x $msipath"
+    RunCommand "msiexec.exe" $install_args
+    if (-not(Test-Path $python_home)) {
+        Write-Host "Python seems to be installed else-where, reinstalling."
+        RunCommand "msiexec.exe" $uninstall_args
+        RunCommand "msiexec.exe" $install_args
+    }
+}
+
+function RunCommand ($command, $command_args) {
+    Write-Host $command $command_args
+    Start-Process -FilePath $command -ArgumentList $command_args -Wait -Passthru
+}
+
+
+function InstallPip ($python_home) {
+    $pip_path = $python_home + "\Scripts\pip.exe"
+    $python_path = $python_home + "\python.exe"
+    if (-not(Test-Path $pip_path)) {
+        Write-Host "Installing pip..."
+        $webclient = New-Object System.Net.WebClient
+        $webclient.DownloadFile($GET_PIP_URL, $GET_PIP_PATH)
+        Write-Host "Executing:" $python_path $GET_PIP_PATH
+        & $python_path $GET_PIP_PATH
+    } else {
+        Write-Host "pip already installed."
+    }
+}
+
+
+function DownloadMiniconda ($python_version, $platform_suffix) {
+    if ($python_version -eq "3.4") {
+        $filename = "Miniconda3-3.5.5-Windows-" + $platform_suffix + ".exe"
+    } else {
+        $filename = "Miniconda-3.5.5-Windows-" + $platform_suffix + ".exe"
+    }
+    $url = $MINICONDA_URL + $filename
+    $filepath = Download $filename $url
+    return $filepath
+}
+
+
+function InstallMiniconda ($python_version, $architecture, $python_home) {
+    Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
+    if (Test-Path $python_home) {
+        Write-Host $python_home "already exists, skipping."
+        return $false
+    }
+    if ($architecture -eq "32") {
+        $platform_suffix = "x86"
+    } else {
+        $platform_suffix = "x86_64"
+    }
+    $filepath = DownloadMiniconda $python_version $platform_suffix
+    Write-Host "Installing" $filepath "to" $python_home
+    $install_log = $python_home + ".log"
+    $args = "/S /D=$python_home"
+    Write-Host $filepath $args
+    Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
+    if (Test-Path $python_home) {
+        Write-Host "Python $python_version ($architecture) installation complete"
+    } else {
+        Write-Host "Failed to install Python in $python_home"
+        Get-Content -Path $install_log
+        Exit 1
+    }
+}
+
+
+function InstallMinicondaPip ($python_home) {
+    $pip_path = $python_home + "\Scripts\pip.exe"
+    $conda_path = $python_home + "\Scripts\conda.exe"
+    if (-not(Test-Path $pip_path)) {
+        Write-Host "Installing pip..."
+        $args = "install --yes pip"
+        Write-Host $conda_path $args
+        Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
+    } else {
+        Write-Host "pip already installed."
+    }
+}
+
+function main () {
+    InstallPython $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON
+    InstallPip $env:PYTHON
+}
+
+main
diff --git a/appveyor/run_with_env.cmd b/appveyor/run_with_env.cmd
new file mode 100644
index 0000000..5da547c
--- /dev/null
+++ b/appveyor/run_with_env.cmd
@@ -0,0 +1,88 @@
+:: To build extensions for 64 bit Python 3, we need to configure environment
+:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
+:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
+::
+:: To build extensions for 64 bit Python 2, we need to configure environment
+:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
+:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
+::
+:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
+:: environment configurations.
+::
+:: Note: this script needs to be run with the /E:ON and /V:ON flags for the
+:: cmd interpreter, at least for (SDK v7.0)
+::
+:: More details at:
+:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
+:: http://stackoverflow.com/a/13751649/163740
+::
+:: Author: Olivier Grisel
+:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
+::
+:: Notes about batch files for Python people:
+::
+:: Quotes in values are literally part of the values:
+::      SET FOO="bar"
+:: FOO is now five characters long: " b a r "
+:: If you don't want quotes, don't include them on the right-hand side.
+::
+:: The CALL lines at the end of this file look redundant, but if you move them
+:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
+:: case, I don't know why.
+ at ECHO OFF
+
+SET COMMAND_TO_RUN=%*
+SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
+SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf
+
+:: Extract the major and minor versions, and allow for the minor version to be
+:: more than 9.  This requires the version number to have two dots in it.
+SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1%
+IF "%PYTHON_VERSION:~3,1%" == "." (
+    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1%
+) ELSE (
+    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2%
+)
+
+:: Based on the Python version, determine what SDK version to use, and whether
+:: to set the SDK for 64-bit.
+IF %MAJOR_PYTHON_VERSION% == 2 (
+    SET WINDOWS_SDK_VERSION="v7.0"
+    SET SET_SDK_64=Y
+) ELSE (
+    IF %MAJOR_PYTHON_VERSION% == 3 (
+        SET WINDOWS_SDK_VERSION="v7.1"
+        IF %MINOR_PYTHON_VERSION% LEQ 4 (
+            SET SET_SDK_64=Y
+        ) ELSE (
+            SET SET_SDK_64=N
+            IF EXIST "%WIN_WDK%" (
+                :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
+                REN "%WIN_WDK%" 0wdf
+            )
+        )
+    ) ELSE (
+        ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
+        EXIT 1
+    )
+)
+
+IF %PYTHON_ARCH% == 64 (
+    IF %SET_SDK_64% == Y (
+        ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
+        SET DISTUTILS_USE_SDK=1
+        SET MSSdk=1
+        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
+        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
+        ECHO Executing: %COMMAND_TO_RUN%
+        call %COMMAND_TO_RUN% || EXIT 1
+    ) ELSE (
+        ECHO Using default MSVC build environment for 64 bit architecture
+        ECHO Executing: %COMMAND_TO_RUN%
+        call %COMMAND_TO_RUN% || EXIT 1
+    )
+) ELSE (
+    ECHO Using default MSVC build environment for 32 bit architecture
+    ECHO Executing: %COMMAND_TO_RUN%
+    call %COMMAND_TO_RUN% || EXIT 1
+)
diff --git a/dist/poretools-0.3.0.win-amd64.exe b/dist/poretools-0.3.0.win-amd64.exe
deleted file mode 100644
index 2e23116..0000000
Binary files a/dist/poretools-0.3.0.win-amd64.exe and /dev/null differ
diff --git a/dist/poretools-0.3.1.win-amd64.exe b/dist/poretools-0.3.1.win-amd64.exe
deleted file mode 100644
index e8fe847..0000000
Binary files a/dist/poretools-0.3.1.win-amd64.exe and /dev/null differ
diff --git a/dist/poretools-0.5.0.win-amd64.exe b/dist/poretools-0.5.0.win-amd64.exe
deleted file mode 100644
index 7affd6e..0000000
Binary files a/dist/poretools-0.5.0.win-amd64.exe and /dev/null differ
diff --git a/dist/poretools.reg b/dist/poretools.reg
deleted file mode 100644
index 88887f1..0000000
--- a/dist/poretools.reg
+++ /dev/null
@@ -1,8 +0,0 @@
-Windows Registry Editor Version 5.00
-[HKEY_LOCAL_MACHINE\Software\Python]
-[HKEY_LOCAL_MACHINE\Software\Python\Pythoncore]
-[HKEY_LOCAL_MACHINE\Software\Python\Pythoncore\2.7]
-[HKEY_LOCAL_MACHINE\Software\Python\Pythoncore\2.7\InstallPath]
- @="C:\\Anaconda"
-[HKEY_LOCAL_MACHINE\Software\Python\Pythoncore\2.7\PythonPath]
- @="C:\\Anaconda;C:\\Anaconda\\Lib\\;C:\\Anaconda\\DLLs\\"
diff --git a/docs/conf.py b/docs/conf.py
index 54d2d1d..d698c80 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,7 +17,7 @@ import os
 sys.path.insert(0, os.path.abspath('../'))
 
 #from poretools import __version__ as version
-version = '0.5.0'
+version = '0.5.1'
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
diff --git a/docs/content/_images/qualpos.png b/docs/content/_images/qualpos.png
new file mode 100644
index 0000000..6870ed7
Binary files /dev/null and b/docs/content/_images/qualpos.png differ
diff --git a/docs/content/examples.rst b/docs/content/examples.rst
index 04e0039..29a135c 100644
--- a/docs/content/examples.rst
+++ b/docs/content/examples.rst
@@ -2,6 +2,12 @@
 Usage examples
 ###############
 
+.. note::
+
+   In the following examples, ``test_data`` can be replaced with the directory containing the FAST5 files
+   from your own runs. If you are new to ONT sequencing, the ``test_data`` directory is shipped with ``poretools``
+   for experimentation.
+
 ===================
 poretools ``fastq``
 ===================
@@ -9,27 +15,35 @@ Extract sequences in FASTQ format from a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools fastq fast5/*.fast5
+    poretools fastq test_data/*.fast5
 
 Or, if there are too many files for your OS to do the wildcard expansion, just provide a directory.
 ``poreutils`` will automatically find all of the FAST5 files in the directory.
 
 .. code-block:: bash
 
-    poretools fastq fast5/
+    poretools fastq test_data/
 
 
 Extract sequences in FASTQ format from a set of FAST5 files.
     
 .. code-block:: bash
 
-    poretools fastq fast5/
-    poretools fastq --min-length 5000 fast5/
-    poretools fastq --type all fast5/
-    poretools fastq --type fwd fast5/
-    poretools fastq --type rev fast5/
-    poretools fastq --type 2D fast5/
-    poretools fastq --type fwd,rev fast5/
+    poretools fastq test_data/
+    poretools fastq --min-length 5000 test_data/
+    poretools fastq --max-length 5000 test_data/
+    poretools fastq --type all test_data/
+    poretools fastq --type fwd test_data/
+    poretools fastq --type rev test_data/
+    poretools fastq --type 2D test_data/
+    poretools fastq --type fwd,rev test_data/
+
+
+A type of "best" will extract the 2D read, if it exists. If not, it will extract either the template or complement read, whichever is available and has a better average Phred score.
+
+.. code-block:: bash
+
+    poretools fastq --type best test_data/
 
 
 Only extract sequence with more complement events than template. These are the so-called "high quality 2D reads" and are the most accurate sequences from a 
@@ -37,7 +51,9 @@ given run.
 
 .. code-block:: bash
 
-    poretools fastq --type 2D --high-quality fast5/
+    poretools fastq --type 2D --high-quality test_data/
+
+The data in fastq format are returned in standard output.
 
 ===================
 poretools ``fasta``
@@ -46,13 +62,17 @@ Extract sequences in FASTA format from a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools fasta fast5/
-    poretools fasta --min-length 5000 fast5/
-    poretools fasta --type all fast5/
-    poretools fasta --type fwd fast5/
-    poretools fasta --type rev fast5/
-    poretools fasta --type 2D fast5/
-    poretools fasta --type fwd,rev fast5/
+    poretools fasta test_data/
+    poretools fasta --min-length 5000 test_data/
+    poretools fasta --max-length 5000 test_data/
+    poretools fasta --type all test_data/
+    poretools fasta --type fwd test_data/
+    poretools fasta --type rev test_data/
+    poretools fasta --type 2D test_data/
+    poretools fasta --type fwd,rev test_data/
+    poretools fasta --type best test_data/
+
+The data in fasta format are returned in standard output.
 
 =====================
 poretools ``combine``
@@ -62,13 +82,13 @@ Create a tarball from a set of FAST5 (HDF5) files.
 .. code-block:: bash
 
     # plain tar (recommended for speed)
-    poretools combine -o foo.fast5.tar fast5/*.fast5
+    poretools combine -o foo.fast5.tar test_data/*.fast5
 
     # gzip
-    poretools combine -o foo.fast5.tar.gz fast5/*.fast5
+    poretools combine -o foo.fast5.tar.gz test_data/*.fast5
 
     # bzip2
-    poretools combine -o foo.fast5.tar.bz2 fast5/*.fast5
+    poretools combine -o foo.fast5.tar.bz2 test_data/*.fast5
 
 ========================
 poretools ``yield_plot``
@@ -77,7 +97,7 @@ Create a collector's curve reflecting the sequencing yield over time for a set o
 
 .. code-block:: bash
 
-    poretools yield_plot --plot-type reads fast5/
+    poretools yield_plot --plot-type reads test_data/
 
 The result should look something like:\
 
@@ -88,7 +108,7 @@ The second is the yield of base pairs over time:
 
 .. code-block:: bash
 
-    poretools yield_plot --plot-type basepairs fast5/
+    poretools yield_plot --plot-type basepairs test_data/
 
 The result should look something like:
     
@@ -102,18 +122,18 @@ Of course, you can save to PDF or PNG with `--saveas`:
     poretools yield_plot \
               --plot-type basepairs \
               --saveas foo.pdf\
-              fast5/
+              test_data/
 
     poretools yield_plot \
               --plot-type basepairs \
               --saveas foo.png\
-              fast5/
+              test_data/
 
 If you don't like the default aesthetics, try `--theme-bw`:
 
 .. code-block:: bash
 
-    poretools yield_plot --theme-bw fast5/
+    poretools yield_plot --theme-bw test_data/
 
 
 ======================
@@ -123,7 +143,7 @@ Make a "squiggle" plot of the signal over time for a given read or set of reads
 
 .. code-block:: bash
 
-    poretools squiggle fast5/foo.fast5
+    poretools squiggle test_data/foo.fast5
 
 
 The result should look something like:
@@ -135,7 +155,7 @@ If you don't like the default aesthetics, try `--theme-bw`:
 
 .. code-block:: bash
 
-    poretools squiggle --theme-bw fast5/
+    poretools squiggle --theme-bw test_data/
 
 
 Other options:
@@ -143,13 +163,13 @@ Other options:
 .. code-block:: bash
 
     # save as PNG
-    poretools squiggle --saveas png fast5/foo.fast5
+    poretools squiggle --saveas png test_data/foo.fast5
 
     # save as PDF
-    poretools squiggle --saveas pdf fast5/foo.fast5
+    poretools squiggle --saveas pdf test_data/foo.fast5
 
     # make a PNG for each FAST5 file in a directory
-    poretools squiggle --saveas png fast5/
+    poretools squiggle --saveas png test_data/
 
 ====================
 poretools ``winner``
@@ -158,12 +178,13 @@ Report the longest read among a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools winner fast5/
-    poretools winner --type all fast5/
-    poretools winner --type fwd fast5/
-    poretools winner --type rev fast5/
-    poretools winner --type 2D fast5/
-    poretools winner --type fwd,rev fast5/
+    poretools winner test_data/
+    poretools winner --type all test_data/
+    poretools winner --type fwd test_data/
+    poretools winner --type rev test_data/
+    poretools winner --type 2D test_data/
+    poretools winner --type fwd,rev test_data/
+    poretools winner --type best test_data/
 
 ===================
 poretools ``stats``
@@ -172,7 +193,7 @@ Collect read size statistics from a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools stats fast5/
+    poretools stats test_data/
     total reads 2286.000000
     total base pairs    8983574.000000
     mean    3929.822397
@@ -187,16 +208,16 @@ Plot a histogram of read sizes from a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools hist fast5/
-    poretools hist --min-length 1000 --max-length 10000 fast5/
+    poretools hist test_data/
+    poretools hist --min-length 1000 --max-length 10000 test_data/
 
-    poretools hist --num-bins 20 --max-length 10000 fast5/
+    poretools hist --num-bins 20 --max-length 10000 test_data/
 
 If you don't like the default aesthetics, try `--theme-bw`:
 
 .. code-block:: bash
 
-    poretools hist --theme-bw fast5/
+    poretools hist --theme-bw test_data/
 
 The result should look something like:
 
@@ -210,7 +231,7 @@ Look at the nucleotide composition of a set of FAST5 files.
 
 .. code-block:: bash
  
-    poretools nucdist fast5/
+    poretools nucdist test_data/
     A   78287   335291  0.233489714904
     C   75270   335291  0.224491561062
     T   92575   335291  0.276103444471
@@ -224,7 +245,7 @@ Look at the quality score composition of a set of FAST5 files.
 
 .. code-block:: bash
 
-    poretools qualdist fast5/
+    poretools qualdist test_data/
     !   0   83403   335291  0.248748102395
     "   1   46151   335291  0.137644613187
     #   2   47463   335291  0.141557632027
@@ -263,6 +284,20 @@ Look at the quality score composition of a set of FAST5 files.
     D   35  6   335291  1.78949032333e-05
     F   37  3   335291  8.94745161666e-06
 
+======================
+poretools ``qualpos``
+======================
+Produce a box-whisker plot of qualoty score distribution over positions in reads.
+
+.. code-block:: bash
+
+    poretools qualpos test_data/
+
+The result should look something like:
+
+.. image:: _images/qualpos.png
+    :width: 400pt 
+
 =====================
 poretools ``tabular``
 =====================
@@ -281,18 +316,36 @@ Extract the raw nanopore events from each FAST5 file.
 
 .. code-block:: bash
 
-    poretools events burn-in-run-2 | head -5
+    poretools events test_data/ | head -5
     file    strand  mean    start   stdv    length  model_state model_level move    p_model_state   mp_model_state  p_mp_model_state    p_A p_C p_G p_T raw_index
-    burn-in-run-2/ch100_file15_strand.fast5  template    56.4648513559   6595.744    1.62598948551   0.026   TGCAT   56.064011186    0   0.076552246287  TGCAT   0.076552246287  0.0980897489641 0.46074353628   0.320651683129  1.90528272165e-05   0
-    burn-in-run-2/ch100_file15_strand.fast5  template    53.2614042745   6595.77 1.12361695715   0.0262  GCATA   54.0674114279   1   0.162623875514  GCGAC   0.183337198021  0.437486003645  0.214306730736  0.335497877123  0.0103035924549 1
-    burn-in-run-2/ch100_file15_strand.fast5  template    51.0001271042   6595.7962   1.07380437991   0.1422  CATAG   52.1964606541   1   0.186606921109  CATAG   0.186606921109  0.424764995152  0.205766683286  0.0905615869544 0.277004168889  2
-    burn-in-run-2/ch100_file15_strand.fast5  template    49.6976788934   6595.9384   1.03634357984   0.0364  ATAGC   51.1117557194   1   0.181952967376  ATAGC   0.181952967376  0.296106771209  0.408638426765  0.0754069980523 0.217721405945  3
-    burn-in-run-2/ch100_file15_strand.fast5  template    51.7633085659   6595.9748   1.04743182078   0.0456  TAGCA   52.6955397413   1   0.192582310652  TAGCA   0.192582310652  0.250481934498  0.311756355221  0.311208716953  0.12343821687   4
+    test_data/2016_3_4_3507_1_ch120_read240_strand.fast5    template    58.3245290305   1559.89409031   1.34165996292   0.0146082337317 CGACTT  58.1304809188   0   0.0226559   CATCTT  0.0229866   0.284469    0.130683    0.137386    0.447461
+    test_data/2016_3_4_3507_1_ch120_read240_strand.fast5    template    50.1420877511   1559.90869854   0.921372775302  0.0348605577689 GACTTT  49.3934875964   1   0.0849836   GACTTT  0.0849836   0.257314    0.350541    0.101351    0.290794
+    test_data/2016_3_4_3507_1_ch120_read240_strand.fast5    template    47.5841029424   1559.9435591    0.771398562801  0.00763612217795    ACTTTG  48.2080162623   1   0.108899    TCTTTG  0.13079 0.000477931 0.00853333  0.306356    0.684632
+    test_data/2016_3_4_3507_1_ch120_read240_strand.fast5    template    51.5879264562   1559.95119522   0.684238307171  0.0112881806109 CTTTGA  52.7784154546   1   0.110625    CTTTGG  0.121103    4.69995e-06 0.00382846  0.0169048   0.979262
+
+Extract the pre-basecalled events from each FAST5 file. 
+
+.. code-block:: bash
+
+    poretools events --pre-basecalled test_data/ | head -5
+    file    strand  mean    start   stdv    length  model_state     model_level     move    p_model_state   mp_model_state  p_mp_model_state        p_A     p_C     p_G     p_T     raw_index
+    burn-in-run-2/ch100_file15_strand.fast5     pre_basecalled  51.4652695313   5352344 0.655003995591      35
+    burn-in-run-2/ch100_file15_strand.fast5     pre_basecalled  60.1776123047   5352379 1.05143911309       18
+    burn-in-run-2/ch100_file15_strand.fast5     pre_basecalled  48.9152374359   5352397 0.864834628834      67
+    burn-in-run-2/ch100_file15_strand.fast5     pre_basecalled  55.4002178596   5352464 1.75915620083       17    
 
 ===================
 poretools ``times``
 ===================
-Extract the start time of each detected molecule into tabular format.
+
+.. code-block:: bash
+
+    poretools times test_data/ | head -5
+    channel filename    read_length exp_starttime   unix_timestamp  duration    unix_timestamp_end  iso_timestamp   day hour    minute
+    120 test_data/2016_3_4_3507_1_ch120_read240_strand.fast5    5826    1457127309  1457128868  47  1457128915  2016-03-04T15:01:08-0700    04  15  01
+    120 test_data/2016_3_4_3507_1_ch120_read353_strand.fast5    3399    1457127309  1457129863  28  1457129891  2016-03-04T15:17:43-0700    04  15  17
+    120 test_data/2016_3_4_3507_1_ch120_read415_strand.fast5    2640    1457127309  1457130808  24  1457130832  2016-03-04T15:33:28-0700    04  15  33
+    120 test_data/2016_3_4_3507_1_ch120_read418_strand.fast5    3487    1457127309  1457130851  31  1457130882  2016-03-04T15:34:11-0700    04  15  34
 
 =======================
 poretools ``occupancy``
@@ -301,9 +354,45 @@ Plot the throughput performance of each pore on the flowcell during a given sequ
 
 .. code-block:: bash
 
-    poretools occupancy fast5/
+    poretools occupancy test_data/
 
 The result should look something like:
 
 .. image:: _images/occupancy.png
-    :width: 400pt    
+    :width: 400pt  
+
+
+===================
+poretools ``index``
+===================
+Tabulate all file location info and metadata such as ASIC ID and temperature from a set of FAST5 files
+
+.. code-block:: bash
+
+    poretools index test_data | head -5 | column -t
+    source_filename                                       template_fwd_length  complement_rev_length  2d_length  asic_id     asic_temp  heatsink_temp  channel  exp_start_time  exp_start_time_string_date  exp_start_time_string_time  start_time  start_time_string_date  start_time_string_time  duration  fast5_version
+    test_data/2016_3_4_3507_1_ch120_read240_strand.fast5  5826                 5011                   5079       3571011476  30.37      36.99          120      1457127309      2016-Mar-04                 (Fri)                       14:35:09    1457128868              2016-Mar-04             (Fri)     15:01:08       47  metrichor1.16
+    test_data/2016_3_4_3507_1_ch120_read353_strand.fast5  3399                 2962                   2940       3571011476  30.37      36.99          120      1457127309      2016-Mar-04                 (Fri)                       14:35:09    1457129863              2016-Mar-04             (Fri)     15:17:43       28  metrichor1.16
+    test_data/2016_3_4_3507_1_ch120_read415_strand.fast5  2640                 2244                   2428       3571011476  30.37      36.99          120      1457127309      2016-Mar-04                 (Fri)                       14:35:09    1457130808              2016-Mar-04             (Fri)     15:33:28       24  metrichor1.16
+    test_data/2016_3_4_3507_1_ch120_read418_strand.fast5  3487                 2950                   3384       3571011476  30.37      36.99          120      1457127309      2016-Mar-04                 (Fri)                       14:35:09    1457130851              2016-Mar-04             (Fri)     15:34:11       31  metrichor1.16
+
+
+======================
+poretools ``metadata``
+=======================
+Extract the metadata from the fast5 file
+
+.. code-block:: bash
+
+    poretools metadata  013731_11rx_v2_3135_1_ch20_file19_strand.fast5
+
+    asic_id asic_temp   heatsink_temp
+    31037   28.11   37.88
+
+    poretools metadata --read  013731_11rx_v2_3135_1_ch20_file19_strand.fast5
+    filename    scaling_used    abasic_peak_height  hairpin_polyt_level median_before   start_time  read_id read_number hairpin_peak_height abasic_found    abasic_event_index  duration    start_mux   hairpin_found   hairpin_event_index
+    013731_11rx_v2_3135_1_ch20_file19_strand.fast5    1   124.31769966    0.413218809334  226.393825112   4648221 3b4e45bf-6d42-45bc-9314-1d8a630971c2    19  125.783167256   1   2   195322  4   1   1478
+
+
+
+
diff --git a/docs/content/history.rst b/docs/content/history.rst
new file mode 100644
index 0000000..07ac13e
--- /dev/null
+++ b/docs/content/history.rst
@@ -0,0 +1,11 @@
+###############
+Release History
+###############
+
+Version 0.6.0 (29-Aug-2016)
+============================
+0. Added new ``organise`` command to place FAST5 files into a useful folder hierarchy
+1. Updated the logic for event timing to handle both R9 and earlier FAST5 files.
+2. Added a "best" option to the ``fasta`` and ``fastq`` tools to identify the best sequence for a read (of 2d, template, complement).
+3. Added R9 RNN support.
+4. Various updates to API to accommodate the R9 changes made to the HDF5 structure.
\ No newline at end of file
diff --git a/docs/content/installation.rst b/docs/content/installation.rst
index 3970027..533252f 100644
--- a/docs/content/installation.rst
+++ b/docs/content/installation.rst
@@ -23,26 +23,27 @@ Install as a plain old user who has root access:
 
 	sudo python setup.py install
 
-Install as a plain old who lacks ``sudo`` priveleges:
+Install as a plain old who lacks ``sudo`` privileges:
 
 .. code-block:: bash
 
 	# details: https://docs.python.org/2/install/index.html#alternate-installation-the-user-scheme
 	python setup.py install --user
-	
+
 	# now update your PATH such that it includes the directory to which poretools was just copied.
 	# look for a line in the installation log like: Installing poretools script to /home/arq5x/.local/bin
         # in this case, I would either add that path to the PATH environment variable for the current session:
         export PATH=$PATH:/home/arq5x/.local/bin
-        
+
         # or, better yet add it to your .bashrc file.
         # at this point you should be able to run the poretools executable from anywhere on your system.
         poretools --help
+
 =================================
 Installing on Windows with MinKNOW installed
 =================================
 
-MinKNOW installs the Anaconda distribution of Python, which means that h5py is already installed.
+MinKNOW installs the Anaconda distribution of Python, which means that h5py, matplotlib, and pandas are already installed.
 
 However, currently MinKNOW does not update the Windows registry to specify that Anaconda is the default version of Python, which makes installing packages tricky. To address this, some changes need to be made to the registry. This can be fixed by downloading the following file:
 
@@ -50,42 +51,18 @@ However, currently MinKNOW does not update the Windows registry to specify that
 
 Ensure it is named 'poretools.reg' and then run it (by double-clicking). Windows will prompt you about making changes to the registry, which you should agree to.
 
-The only additional dependency that is required is rpy2 and R.
-
-Download rpy2 from the pre-built binary page at: <http://www.lfd.uci.edu/~gohlke/pythonlibs/>. You want the version for Python 2.7 on 64-bit Windows. Run the installer.
-
-Then, to install poretools, simply download and run the Windows installer:
-
-        <https://github.com/arq5x/poretools/blob/master/dist/poretools-0.3.1.win-amd64.exe?raw=true>
+Now, you need to install seaborn, which is the plotting package that ``poretools`` uses as a replacement for R and rpy2 as of version ``0.5.1``.
 
-==================================
-Plotting with R on Windows
-==================================
+    conda install seaborn
 
-If you wish to use the R plots (experimental, on Windows) you also need to:
+If conda cannot install seaborn, you could consider installing ``pip`` and running:
 
-Download R for Windows from: <http://cran.r-project.org/bin/windows/base/>
+    pip install seaborn
 
-Run the installer, then start up R and install ggplot2:
-
-.. code-block:: R
-
-	install.packages("ggplot2")
-
-You need to set two environment variables to run poretools currently:
-
-.. code-block:: bash
-
-	set R_HOME=c:\Program Files\R\R-3.1.1
-	set R_USER=c:\Users\MY USER\Documents
-
-You may also need to add the following directory to your PATH:
+Then, to install poretools, simply download and run the Windows installer:
 
-.. code-block:: bash
+        <https://github.com/arq5x/poretools/blob/master/dist/poretools-0.5.1.win-amd64.exe?raw=true>
 
-        C:\Program Files\R\R-3.1.1\bin\x64
-        
-Instructions for updating your PATH on Windows can be found here: http://geekswithblogs.net/renso/archive/2009/10/21/how-to-set-the-windows-path-in-windows-7.aspx
 
 =================================
 Installing on OS X
@@ -102,10 +79,17 @@ To install HomeBrew, you run the following command (lifted from the HomeBrew sit
 Using HomeBrew, install HDF5 from the HomeBrew Science "tap";
 
 .. code-block:: bash
-	
-	brew tap homebrew/science 
+
+	brew tap homebrew/science
 	brew install hdf5
 
+You will also need Cython and numpy packages (if they are not already installed):
+
+.. code-block:: bash
+
+	pip install cython
+	pip install numpy
+	
 Now, you will need to install the R statistical analysis software (you may already have this...). The `CRAN <http://cran.r-project.org/bin/macosx/>`_ website houses automatic installation packages for different versions of OS X.  Here are links to such packages for `Snow Leopard and higher <http://cran.r-project.org/bin/macosx/R-3.1.1-snowleopard.pkg>`_ as well as `Mavericks <http://cran.r-project.org/bin/macosx/R-3.1.1-mavericks.pkg>`_.
 
 At this point, you can install poretools.
@@ -154,29 +138,7 @@ Or, for Ubuntu 14.04:
 
 .. code-block:: bash
 
-	deb http://www.stats.bris.ac.uk/R/bin/linux/ubuntu trusty/ 
-
-Then, run the following commands to install R 3.0:
-
-.. code-block:: bash
-
-	sudo apt-get update
-	sudo apt-get install r-base python-rpy2
-
-Start R
-
-.. code-block:: bash
-
-	R
-
-Then run the following commands within the R programme, and follow any prompts:
-
-.. code-block:: R
-
-	options("repos" = c(CRAN = "http://cran.rstudio.com/"))
-	install.packages("codetools")
-	install.packages("MASS")
-	install.packages("ggplot2")
+	deb http://www.stats.bris.ac.uk/R/bin/linux/ubuntu trusty/
 
 Then install poretools, finally:
 
@@ -200,12 +162,13 @@ Via docker
 Build the docker container yourself (preferred):
 
 .. code-block:: bash
+
 	git clone https://github.com/arq5x/poretools
 	cd poretools
 	docker build -t poretools .
 	docker run poretools --help
 
-Or use the pre-built `image from Docker Hub <https://registry.hub.docker.com/u/stephenturner/poretools/>`_: 
+Or use the pre-built `image from Docker Hub <https://registry.hub.docker.com/u/stephenturner/poretools/>`_:
 
 .. code-block:: bash
 
diff --git a/docs/index.rst b/docs/index.rst
index 65f5d19..37368bd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -44,18 +44,20 @@ Table of contents
    content/help
    content/notebook
    content/examples
+   content/history
 
 =================
 Requirements
 =================
   - HDF5 >= 1.8.7 (http://www.hdfgroup.org/HDF5/)
-  - R >= 3.0.0
   - Python >= 2.7
-  - rpy2 >= 2.4.2
   - h5py >= 2.0.0
+  - matplotlib
+  - seaborn
+  - pandas
 
 .. note::
-    Please note that Anaconda and Python(x,y) already have all these dependencies installed, other than R/Rpy2:
+    Please note that Anaconda and Python(x,y) already have all these dependencies installed:
     Anaconda (Linux, Windows, OS X): https://store.continuum.io/cshop/anaconda/ Python(x,y) (Windows): https://code.google.com/p/pythonxy/
 
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..6966869
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1 @@
+sphinx
diff --git a/poretools/Event.py b/poretools/Event.py
index 877ad17..1363fef 100644
--- a/poretools/Event.py
+++ b/poretools/Event.py
@@ -7,20 +7,62 @@ class Event(object):
 	"""
 	def __init__(self, row):
 		self.row = row
-		self.mean = row['mean']
-		self.start = row['start']
-		self.stdv = row['stdv']
-		self.length = row['length']
-		self.model_state = row['model_state']
-		self.model_level = row['model_level']
-		self.move = row['move']
-		self.p_model_state = row['p_model_state']
-		self.mp_state = row['mp_state']
-		self.p_mp_state = row['p_mp_state']
-		self.p_A = row['p_A']
-		self.p_C = row['p_C']
-		self.p_G = row['p_G']
-		self.p_T = row['p_T']
+		try:
+			self.mean = row['mean']
+		except Exception:
+			self.mean = ""
+		try:
+			self.start = row['start']
+		except Exception:
+			self.start = ""
+		try:
+			self.stdv = row['stdv']
+		except Exception:
+			self.stdv = ""
+		try:
+			self.length = row['length']
+		except Exception:
+			self.length = ""
+		try:
+			self.model_state = row['model_state']
+		except Exception:
+			self.model_state = ""
+		try:
+			self.model_level = row['model_level']
+		except Exception:
+			self.model_level = ""
+		try:
+			self.move = row['move']
+		except Exception:
+			self.move = ""
+		try:
+			self.p_model_state = row['p_model_state']
+		except Exception:
+			self.p_model_state = ""
+		try:
+			self.mp_state = row['mp_state']
+		except Exception:
+			self.mp_state = ""
+		try:
+			self.p_mp_state = row['p_mp_state']
+		except Exception:
+			self.p_mp_state = ""
+		try:
+			self.p_A = row['p_A']
+		except Exception:
+			self.p_A = ""
+		try:
+			self.p_C = row['p_C']
+		except Exception:
+			self.p_C = ""
+		try:
+			self.p_G = row['p_G']
+		except Exception:
+			self.p_G = ""
+		try:
+			self.p_T = row['p_T']
+		except Exception:
+			self.p_T = ""
 
 	def __repr__(self):
 		return '\t'.join([str(s) for s in [self.mean, self.start, self.stdv,
diff --git a/poretools/Fast5File.py b/poretools/Fast5File.py
index 72bfa41..5421fdf 100644
--- a/poretools/Fast5File.py
+++ b/poretools/Fast5File.py
@@ -14,9 +14,21 @@ logger = logging.getLogger('poretools')
 import formats
 from Event import Event
 
-fastq_paths = {'template' : '/Analyses/Basecall_2D_000/BaseCalled_template',
-               'complement' : '/Analyses/Basecall_2D_000/BaseCalled_complement',
-               'twodirections' : '/Analyses/Basecall_2D_000/BaseCalled_2D'}
+fastq_paths = {
+  'closed' : {},
+  'r9rnn' :         { 'template' : '/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template'},
+  'metrichor1.16' : { 'template' : '/Analyses/Basecall_1D_%03d/BaseCalled_template',
+                      'complement' : '/Analyses/Basecall_1D_%03d/BaseCalled_complement',
+                      'twodirections' : '/Analyses/Basecall_2D_%03d/BaseCalled_2D',
+                      'pre_basecalled' : '/Analyses/EventDetection_000/Reads/'
+                    },
+  'classic' :       { 'template' : '/Analyses/Basecall_2D_%03d/BaseCalled_template',
+                      'complement' : '/Analyses/Basecall_2D_%03d/BaseCalled_complement',
+                      'twodirections' : '/Analyses/Basecall_2D_%03d/BaseCalled_2D',
+                      'pre_basecalled' : '/Analyses/EventDetection_000/Reads/'
+                    },
+  'prebasecalled' : {'pre_basecalled' : '/Analyses/EventDetection_000/Reads/'}
+}
 
 FAST5SET_FILELIST = 0
 FAST5SET_DIRECTORY = 1
@@ -24,15 +36,50 @@ FAST5SET_SINGLEFILE = 2
 FAST5SET_TARBALL = 3
 PORETOOLS_TMPDIR = '.poretools_tmp'
 
+
+class Fast5DirHandler(object):
+
+    patterns = ["*.fast5"]
+
+    def __init__(self, dir):
+        self.dir = dir
+        self.files = []
+        super(Fast5DirHandler, self).__init__()
+
+        if os.path.isdir(self.dir):
+            pattern = self.dir + '/' + '*.fast5'
+            files = glob.glob(pattern)
+            self.files = files
+
+    def process(self, event):
+        self.files.append(event.src_path)
+
+    def on_created(self, event):
+        self.process(event)
+
+    def clear(self):
+        self.files = []
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if len(self.files) > 0:
+            return self.files.pop(0)
+        else:
+            raise StopIteration()
+
+
 class Fast5FileSet(object):
 
-	def __init__(self, fileset):
+	def __init__(self, fileset, group=0):
 		if isinstance(fileset, list):
 			self.fileset = fileset
 		elif isinstance(fileset, str):
 			self.fileset = [fileset]
 		self.set_type = None
 		self.num_files_in_set = None
+		self.group = group
 		self._extract_fast5_files()
 
 	def get_num_files(self):
@@ -48,7 +95,7 @@ class Fast5FileSet(object):
 
 	def next(self):
 		try:
-			return Fast5File(self.files.next())
+			return Fast5File(self.files.next(), self.group)
 		except Exception as e:
 			# cleanup our mess
 			if self.set_type == FAST5SET_TARBALL:
@@ -126,9 +173,14 @@ class TarballFileIterator:
 
 class Fast5File(object):
 
-	def __init__(self, filename):
+	def __init__(self, filename, group=0):
 		self.filename = filename
+		self.group = group
 		self.is_open = self.open()
+		if self.is_open:
+			self.version = self.guess_version()
+		else:
+			self.version = 'closed'
 
 		self.fastas = {}
 		self.fastqs = {}
@@ -141,8 +193,13 @@ class Fast5File(object):
 		self.have_fastas = False
 		self.have_templates = False
 		self.have_complements = False
+		self.have_pre_basecalled = False
 		self.have_metadata = False
 
+
+	def __del__(self):
+		self.close()
+
 	####################################################################
 	# Public API methods
 	####################################################################
@@ -157,6 +214,30 @@ class Fast5File(object):
 		except Exception, e:
 			logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
 			return False
+
+	def guess_version(self):
+		"""
+		Try and guess the location of template/complement blocks
+		"""
+		try:
+			self.hdf5file["/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template" % (self.group)]
+			return 'r9rnn'
+		except KeyError:
+			pass
+
+		try:
+			self.hdf5file["/Analyses/Basecall_2D_%03d/BaseCalled_template" % (self.group)]
+			return 'classic'
+		except KeyError:
+			pass
+
+		try:
+			self.hdf5file["/Analyses/Basecall_1D_%03d/BaseCalled_template" % (self.group)]
+			return 'metrichor1.16'
+		except KeyError:
+			pass
+
+		return 'prebasecalled'
 			
 	def close(self):
 		"""
@@ -164,6 +245,7 @@ class Fast5File(object):
 		"""
 		if self.is_open:
 			self.hdf5file.close()
+			self.is_open = False
 
 	def has_2D(self):
 		"""
@@ -187,7 +269,6 @@ class Fast5File(object):
 			self._extract_fastqs_from_fast5()
 			self.have_fastqs = True
 
-		# TODO "best". What is "best"?
 		fqs = []
 		if choice == "all":
 			for fastq in self.fastqs:
@@ -201,6 +282,8 @@ class Fast5File(object):
 		elif choice == "fwd,rev":
 				fqs.append(self.fastqs.get('template'))
 				fqs.append(self.fastqs.get('complement'))
+		elif choice == "best":
+				fqs.append(self.fastqs.get(self.get_best_type()))
 
 		return fqs
 
@@ -214,7 +297,6 @@ class Fast5File(object):
 			self._extract_fastas_from_fast5()
 			self.have_fastas = True
 
-		# TODO "best". What is "best"?
 		fas = []
 		if choice == "all":
 			for fasta in self.fastas:
@@ -228,6 +310,11 @@ class Fast5File(object):
 		elif choice == "fwd,rev":
 				fas.append(self.fastas.get('template'))
 				fas.append(self.fastas.get('complement'))
+		elif choice == "best":
+				if self.have_fastqs is False:
+					self._extract_fastqs_from_fast5()
+					self.have_fastqs = True
+				fas.append(self.fastas.get(self.get_best_type()))
 
 		return fas
 
@@ -297,6 +384,16 @@ class Fast5File(object):
 		
 		return self.complement_events
 
+	def get_pre_basecalled_events(self):
+		"""
+		Return the table of pre-basecalled events
+		"""
+		if self.have_pre_basecalled is False:
+			self._extract_pre_basecalled_events()
+			self.have_pre_basecalled = True
+
+		return self.pre_basecalled_events		
+
 	####################################################################
 	# Flowcell Metadata methods
 	####################################################################
@@ -311,7 +408,7 @@ class Fast5File(object):
 			self.have_metadata = True
 
 		try:
-			return self.keyinfo['tracking_id'].attrs['exp_start_time']
+			return int(self.keyinfo['tracking_id'].attrs['exp_start_time'])
 		except:
 			return None
 
@@ -334,10 +431,23 @@ class Fast5File(object):
 		except:
 			return None
 
-	def find_read_number_block(self):
-		path = "/Analyses/Basecall_2D_000"
+	def find_read_number_block_link(self):
+		"""
+		Old-style FAST5/HDF5 structure:
+		Inside /Analyses/Basecall_XXXX there is an 'InputEvents'
+		link that points to the location of the Read in the HDF5 file.
+
+		Return the Read's node if found, or None if not found.
+		"""
+		if self.version == 'classic':
+			path = "/Analyses/Basecall_2D_000"
+		else:
+			path = "/Analyses/Basecall_1D_000"
+
 		basecall = self.hdf5file[path]
 		path = basecall.get('InputEvents', getlink=True)
+		if path is None:
+			return None
 
 		# the soft link target seems broken?
 		newpath = "/" + "/".join(path.path.split("/")[:-1])
@@ -346,8 +456,55 @@ class Fast5File(object):
 
 		return node
 
+	def hdf_internal_error(self,reason):
+		"""Report an error and exit in case of an invalid
+(or unknown) HDF5 structure. Hurrah for ONT!"""
+		msg = """poretools internal error in file '%s':
+%s
+Please report this error (with the offending file) to:
+    https://github.com/arq5x/poretools/issues""" % (self.filename, reason)
+		sys.exit(msg)
+
+        def find_read_number_block_fixed_raw(self):
+		"""
+		New-style FAST5/HDF5 structure:
+		There is a fixed 'Raw/Reads' node with only one 'read_NNN' item
+		inside it (no more 'InputEvents' link).
+
+		Return the Read's node if found, or None if not found.
+		"""
+		raw_reads = self.hdf5file.get('Raw/Reads')
+		if raw_reads is None:
+			return None
+
+		reads = raw_reads.keys()
+		if len(reads)==0:
+			self.hdf_internal_error("Raw/Reads group does not contain any items")
+		if len(reads)>1:
+			# This should not happen, based on information from ONT developers.
+			self.hdf_internal_error("Raw/Reads group contains more than one item")
+		path = 'Raw/Reads/%s' % ( reads[0] )
+		node = self.hdf5file.get(path)
+		if node is None:
+			self.hdf_internal_error("Failed to get HDF5 item '%s'"% (path))
+		return node
+
+        def find_read_number_block(self):
+		"""Returns the node of the 'Read_NNN' information, or None if not
+		found"""
+		node = self.find_read_number_block_link()
+		if node is not None:
+			return node
+
+		node = self.find_read_number_block_fixed_raw()
+		if node is not None:
+			return node
+
+		# Couldn't find the node, bail out.
+		self.hdf_internal_error("unknown HDF5 structure: can't find read block item")
+
 	def find_event_timing_block(self):
-		path = "/Analyses/Basecall_2D_000/BaseCalled_template"
+		path = fastq_paths[self.version]['template'] % (self.group)
 		try:
 			node = self.hdf5file[path]
 			path = node.get('Events')
@@ -371,6 +528,10 @@ class Fast5File(object):
 	def get_duration(self):
 		node = self.find_event_timing_block()
 		if node:
+			#NOTE: 'duration' in the HDF is a float-point number,
+			#      and can be less than one - which will return 0.
+			#TODO: consider supporing floating-point, or at least
+			#      rounding values instead of truncating to int.
 			return int(node.attrs['duration'])
 		return None
 
@@ -388,7 +549,10 @@ class Fast5File(object):
 		start_time = self.get_start_time()
 		duration = self.get_duration()
 
-		if start_time and duration:
+		# 'duration' can be zero and still valid
+		# (if the duration of the template was less than 1 second).
+		# Check for None instead of False.
+		if start_time and (duration is not None):
 			return start_time + duration
 		else:
 			return None
@@ -402,7 +566,7 @@ class Fast5File(object):
 			self.have_metadata = True
 
 		try:
-			return self.keyinfo['tracking_id'].attrs['version_name']
+			return self.keyinfo['context_tags'].attrs['version_name']
 		except:
 			return None
 
@@ -488,22 +652,58 @@ class Fast5File(object):
 			self._get_metadata()
 			self.have_metadata = True
 
+        def get_host_name(self):
+                """
+                Return the MinKNOW host computer name.
+                """
+                if self.have_metadata is False:
+                        self._get_metadata()
+                        self.have_metadata = True
+
+                try:
+                        return self.keyinfo['tracking_id'].attrs['hostname']
+                except:
+                        return None
+
+                if self.have_metadata is False:
+                        self._get_metadata()
+                        self.have_metadata = True
+
 	def get_device_id(self):
 		"""
 		Return the flowcell's device id.
 		"""
+
+                if self.have_metadata is False:
+                        self._get_metadata()
+                        self.have_metadata = True
+
 		try:
 			return self.keyinfo['tracking_id'].attrs['device_id']
 		except:
 			return None
 
+	def get_sample_name(self):
+		"""
+		Return the user supplied sample name
+		"""
+
+                if self.have_metadata is False:
+                        self._get_metadata()
+                        self.have_metadata = True
+
+		try:
+			return self.keyinfo['context_tags'].attrs['user_filename_input']
+		except Exception, e:
+			return None
+
 
 	def get_template_events_count(self):
 		"""
 		Pull out the event count for the template strand
 		"""
 		try:
-			table = self.hdf5file[fastq_paths['template']]
+			table = self.hdf5file[fastq_paths[self.version]['template'] % self.group]
 			return len(table['Events'][()])
 		except Exception, e:
 			return 0
@@ -513,18 +713,42 @@ class Fast5File(object):
 		Pull out the event count for the complementary strand
 		"""
 		try:
-			table = self.hdf5file[fastq_paths['complement']]
+			table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group]
 			return len(table['Events'][()])
 		except Exception, e:
 			return 0
 
 	def is_high_quality(self):
-		if self.get_complement_events_count() > \
+		if self.get_complement_events_count() >= \
 		   self.get_template_events_count():
 			return True
 		else:
 			return False
 
+	def get_best_type(self):
+		"""
+		Returns the type with the anticipated highest quality:
+		'twodirections', 'template', 'complement' or None.
+		"""
+		try:
+			if 'twodirections' in self.fastqs:
+				return 'twodirections'
+			fwd = 'template' in self.fastqs
+			rev = 'complement' in self.fastqs
+			if fwd and not rev:
+				return 'template'
+			elif rev and not fwd:
+				return 'complement'
+			else:
+				fwd_err_rate = self.fastqs['template'].est_error_rate()
+				rev_err_rate = self.fastqs['complement'].est_error_rate()
+				if fwd_err_rate <= rev_err_rate:
+					return 'template'
+				else:
+					return 'complement'
+		except Exception, e:
+			return None
+
 	####################################################################
 	# Private API methods
 	####################################################################
@@ -533,11 +757,11 @@ class Fast5File(object):
 		"""
 		Return the sequence in the FAST5 file in FASTQ format
 		"""
-		for id, h5path in fastq_paths.iteritems(): 
+		for id, h5path in fastq_paths[self.version].iteritems(): 
 			try:
-				table = self.hdf5file[h5path]
+				table = self.hdf5file[h5path % self.group]
 				fq = formats.Fastq(table['Fastq'][()])
-				fq.name += "_" + id + ":" + self.filename
+				fq.name += " " + self.filename
 				self.fastqs[id] = fq
 			except Exception, e:
 				pass
@@ -546,11 +770,11 @@ class Fast5File(object):
 		"""
 		Return the sequence in the FAST5 file in FASTA format
 		"""
-		for id, h5path in fastq_paths.iteritems(): 
+		for id, h5path in fastq_paths[self.version].iteritems(): 
 			try:
-				table = self.hdf5file[h5path]
+				table = self.hdf5file[h5path % self.group]
 				fa = formats.Fasta(table['Fastq'][()])
-				fa.name += "_" + id + " " + self.filename
+				fa.name += " " + self.filename
 				self.fastas[id] = fa
 			except Exception, e:
 				pass
@@ -560,7 +784,7 @@ class Fast5File(object):
 		Pull out the event information for the template strand
 		"""
 		try:
-			table = self.hdf5file[fastq_paths['template']]
+			table = self.hdf5file[fastq_paths[self.version]['template'] % self.group]
 			self.template_events = [Event(x) for x in table['Events'][()]]
 		except Exception, e:
 			self.template_events = []
@@ -570,11 +794,24 @@ class Fast5File(object):
 		Pull out the event information for the complementary strand
 		"""
 		try:
-			table = self.hdf5file[fastq_paths['complement']]
+			table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group]
 			self.complement_events = [Event(x) for x in table['Events'][()]]
 		except Exception, e:
 			self.complement_events = []
 
+	def _extract_pre_basecalled_events(self):
+		"""
+		Pull out the pre-basecalled event information 
+		"""
+		# try:
+		table = self.hdf5file[fastq_paths[self.version]['pre_basecalled']]
+		events = []
+		for read in table:
+			events.extend(table[read]["Events"][()])
+		self.pre_basecalled_events = [Event(x) for x in events]
+		# except Exception, e:
+			# self.pre_basecalled_events = []			
+
 	def _get_metadata(self):
 		try:
 			self.keyinfo = self.hdf5file['/UniqueGlobalKey']
diff --git a/poretools/Fast5File_pytables.py b/poretools/Fast5File_pytables.py
deleted file mode 100644
index 3794bd3..0000000
--- a/poretools/Fast5File_pytables.py
+++ /dev/null
@@ -1,504 +0,0 @@
-import sys
-import os
-import glob
-import tarfile
-import shutil
-import tables as pyhdf5
-
-#logging
-import logging
-logger = logging.getLogger('poretools')
-
-
-# poretools imports
-import formats
-from Event import Event
-
-fastq_paths = {'template' : '/Analyses/Basecall_2D_000/BaseCalled_template',
-               'complement' : '/Analyses/Basecall_2D_000/BaseCalled_complement',
-               'twodirections' : '/Analyses/Basecall_2D_000/BaseCalled_2D'}
-
-FAST5SET_FILELIST = 0
-FAST5SET_DIRECTORY = 1
-FAST5SET_SINGLEFILE = 2
-FAST5SET_TARBALL = 3
-PORETOOOLS_TMPDIR = '.poretools_tmp'
-
-class Fast5FileSet(object):
-
-	def __init__(self, fileset):
-		if isinstance(fileset, list):
-			self.fileset = fileset
-		elif isinstance(fileset, str):
-			self.fileset = [fileset]
-		self.set_type = None
-		self.num_files_in_set = None
-		self._extract_fast5_files()
-
-	def get_num_files(self):
-		"""
-		Return the number of files in the FAST5 set.
-		"""
-		return self.num_files_in_set
-
-	def __iter__(self):
-		return self
-
-	def next(self):
-		try:
-			return Fast5File(self.files.next())
-		except Exception as e:
-			# cleanup our mess
-			if self.set_type ==	 FAST5SET_TARBALL:
-				shutil.rmtree(PORETOOOLS_TMPDIR)
-			raise StopIteration
-
-	def _extract_fast5_files(self):
-
-		# return as-is if list of files
-		if len(self.fileset) > 1:
-			self.files = iter(self.fileset)
-			self.num_files_in_set = len(self.fileset)
-			self.set_type = FAST5SET_FILELIST
-		elif len(self.fileset) == 1:
-			# e.g. ['/path/to/dir'] or ['/path/to/file']
-			f = self.fileset[0]
-			# is it a directory?
-			if os.path.isdir(f):
-				pattern = f + '/' + '*.fast5'
-				files = glob.glob(pattern)
-				self.files = iter(files)
-				self.num_files_in_set = len(files)
-				self.set_type = FAST5SET_DIRECTORY
-				if not len(files):
-					logger.warning("Directory is empty!")
-
-			# is it a tarball?
-			elif tarfile.is_tarfile(f):
-				if os.path.isdir(PORETOOOLS_TMPDIR):
-					shutil.rmtree(PORETOOOLS_TMPDIR)
-				os.mkdir(PORETOOOLS_TMPDIR)
-				
-				tar = tarfile.open(f)
-				tar.extractall(PORETOOOLS_TMPDIR)
-				self.files = (PORETOOOLS_TMPDIR + '/' + f for f in tar.getnames())
-				self.num_files_in_set = len(tar.getnames())
-				self.set_type = FAST5SET_TARBALL
-
-			# just a single FAST5 file.
-			else:
-				self.files = iter([f])
-				self.num_files_in_set = 1
-				self.set_type = FAST5SET_SINGLEFILE
-		else:
-			logger.error("Directory %s could not be opened. Exiting.\n" % dir)
-			sys.exit()
-
-
-class Fast5File(object):
-
-	def __init__(self, filename):
-		self.filename = filename
-		self.is_open = self.open()
-
-		self.fastas = {}
-		self.fastqs = {}
-		
-		# pre-load the FASTQ data
-		#self._extract_fastqs_from_fast5()
-
-		# booleans for lazy loading (speed)
-		self.have_fastqs = False
-		self.have_fastas = False
-		self.have_templates = False
-		self.have_complements = False
-		self.have_metadata = False
-
-	####################################################################
-	# Public API methods
-	####################################################################
-
-	def open(self):
-		"""
-		Open an ONT Fast5 file, assuming HDF5 format
-		"""
-		try:
-			self.hdf5file = pyhdf5.open_file(self.filename, 'r')
-			return True
-		except Exception, e:
-			logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
-			return False
-			
-	def close(self):
-		"""
-		Close an open an ONT Fast5 file, assuming HDF5 format
-		"""
-		if self.is_open:
-			self.hdf5file.close()
-
-
-	def get_fastqs(self, choice):
-		"""
-		Return the set of base called sequences in the FAST5
-		in FASTQ format.
-		"""
-		if self.have_fastqs is False:
-			self._extract_fastqs_from_fast5()
-			self.have_fastqs = True
-
-		# TODO "best". What is "best"?
-		fqs = []
-		if choice == "all":
-			for fastq in self.fastqs:
-				fqs.append(self.fastqs[fastq])
-		elif choice == "fwd":
-				fqs.append(self.fastqs.get('template'))
-		elif choice == "rev":
-				fqs.append(self.fastqs.get('complement'))
-		elif choice == "2D":
-				fqs.append(self.fastqs.get('twodirections'))
-		elif choice == "fwd,rev":
-				fqs.append(self.fastqs.get('template'))
-				fqs.append(self.fastqs.get('complement'))
-
-		return fqs
-
-
-	def get_fastas(self, choice):
-		"""
-		Return the set of base called sequences in the FAST5
-		in FASTQ format.
-		"""
-		if self.have_fastas is False:
-			self._extract_fastas_from_fast5()
-			self.have_fastas = True
-
-		# TODO "best". What is "best"?
-		fas = []
-		if choice == "all":
-			for fasta in self.fastas:
-				fas.append(self.fastas[fasta])
-		elif choice == "fwd":
-				fas.append(self.fastas.get('template'))
-		elif choice == "rev":
-				fas.append(self.fastas.get('complement'))
-		elif choice == "2D":
-				fas.append(self.fastas.get('twodirections'))
-		elif choice == "fwd,rev":
-				fas.append(self.fastas.get('template'))
-				fas.append(self.fastas.get('complement'))
-
-		return fas
-
-
-	def get_fastq(self):
-		"""
-		Return the base called sequence in the FAST5
-		in FASTQ format. Try 2D then template, then complement.
-		If all fail, return None
-		"""
-		if self.have_fastqs is False:
-			self._extract_fastqs_from_fast5()
-			self.have_fastqs = True
-
-		if not self.fastqs:
-			return None
-		elif self.fastqs.get('twodirections') is not None:
-			return self.fastqs.get('twodirections')
-		elif self.fastqs.get('template') is not None:
-			return self.fastqs.get('template')
-		elif self.fastqs.get('complement') is not None:
-			return self.fastqs.get('complement')
-
-
-	def get_fasta(self):
-		"""
-		Return the base called sequence in the FAST5
-		in FASTA format. Try 2D then template, then complement.
-		If all fail, return None
-		"""
-		if not self.fastas:
-			return None
-		elif self.fastas.get('twodirections') is not None:
-			return self.fastas.get('twodirections')
-		elif self.fastas.get('template') is not None:
-			return self.fastas.get('template')
-		elif self.fastas.get('complement') is not None:
-			return self.fastas.get('complement')
-
-	def get_template_events(self):
-		"""
-		Return the table of event data for the template strand
-		"""
-		if self.have_templates is False:
-			self._extract_template_events()
-			self.have_templates = True
-
-		return self.template_events
-
-	def get_complement_events(self):
-		"""
-		Return the table of event data for the complement strand
-		"""
-		if self.have_complements is False:
-			self._extract_complement_events()
-			self.have_complements = True
-		
-		return self.complement_events
-
-	####################################################################
-	# Flowcell Metadata methods
-	####################################################################
-
-	def get_exp_start_time(self):
-		"""
-		Return the starting time at which signals were collected
-		for the given read.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('exp_start_time')
-		except:
-			return None
-
-	def get_channel_number(self):
-		"""
-		Return the channel (pore) number at which signals were collected
-		for the given read.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.channel_id._f_getAttr('channel_number')
-		except:
-			return None
-
-	def find_read_number_block(self):
-		path = "/Analyses/Basecall_2D_000/InputEvents"
-		try:
-			newpath = self.hdf5file.getNode(path)
-
-			# the soft link target seems broken?
-			newpath = "/" + "/".join(newpath.target.split("/")[:-1])
-# + '/Events'
-
-			node = self.hdf5file.getNode(newpath)
-
-			return node
-		except Exception:
-			pass
-
-	def find_event_timing_block(self):
-		path = "/Analyses/Basecall_2D_000/BaseCalled_template/Events"
-		try:
-			return self.hdf5file.getNode(path)
-		except Exception:
-			pass
-		
-		return None
-
-	def get_read_number(self):
-		"""
-		Return the read number for the pore representing the given read.
-		"""
-		node = self.find_read_number_block()
-		if node:
-			try:
-				return node._f_getAttr('read_number')
-			except:
-				return None
-		return None
-
-	def get_duration(self):
-		node = self.find_event_timing_block()
-		if node:
-			return int(node._f_getAttr('duration'))
-		return None
-
-	def get_start_time(self):
-		exp_start_time	= self.get_exp_start_time()
-	
-		node = self.find_event_timing_block()
-		if node:
-			return int(exp_start_time) + int(node._f_getAttr('start_time'))
-	
-		return None
-
-	def get_end_time(self):
-		exp_start_time	= self.get_exp_start_time()
-		start_time = self.get_start_time()
-		duration = self.get_duration()
-
-		if start_time and duration:
-			return start_time + duration
-		else:
-			return None
-
-	def get_version_name(self):
-		"""
-		Return the flow cell version name.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('version_name')
-		except:
-			return None
-
-	def get_run_id(self):
-		"""
-		Return the run id.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('run_id')
-		except:
-			return None
-
-	def get_heatsink_temp(self):
-		"""
-		Return the heatsink temperature.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('heatsink_temp')
-		except:
-			return None
-
-	def get_asic_temp(self):
-		"""
-		Return the ASIC temperature.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('asic_temp')
-		except:
-			return None
-
-	def get_flowcell_id(self):
-		"""
-		Return the flowcell_id.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('flowcell_id')
-		except:
-			return None
-
-	def get_run_purpose(self):
-		"""
-		Return the exp_script_purpose.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('exp_script_purpose')
-		except:
-			return None
-
-	def get_asic_id(self):
-		"""
-		Return the flowcell's ASIC id.
-		"""
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('asic_id')
-		except:
-			return None
-
-		if self.have_metadata is False:
-			self._get_metadata()
-			self.have_metadata = True
-
-	def get_device_id(self):
-		"""
-		Return the flowcell's device id.
-		"""
-		try:
-			return self.keyinfo.tracking_id._f_getAttr('device_id')
-		except:
-			return None
-
-	####################################################################
-	# Private API methods
-	####################################################################
-
-	def _extract_fastqs_from_fast5(self):
-		"""
-		Return the sequence in the FAST5 file in FASTQ format
-		"""
-		for id, h5path in fastq_paths.iteritems(): 
-			try:
-				table = self.hdf5file.getNode(h5path)
-				fq = formats.Fastq(table.Fastq[()])
-				fq.name += "_" + id + ":" + self.filename
-				self.fastqs[id] = fq
-			except Exception, e:
-				pass
-
-	def _extract_fastas_from_fast5(self):
-		"""
-		Return the sequence in the FAST5 file in FASTA format
-		"""
-		for id, h5path in fastq_paths.iteritems(): 
-			try:
-				table = self.hdf5file.getNode(h5path)
-				fa = formats.Fasta(table.Fastq[()])
-				fa.name += "_" + id + " " + self.filename
-				self.fastas[id] = fa
-			except Exception, e:
-				pass
-
-	def _extract_template_events(self):
-		"""
-		Pull out the event information for the template strand
-		"""
-		try:
-			table = self.hdf5file.getNode(fastq_paths['template'])
-			self.template_events = [Event(x) for x in table.Events]
-		except Exception, e:
-			self.template_events = []
-
-	def _extract_complement_events(self):
-		"""
-		Pull out the event information for the complementary strand
-		"""
-		try:
-			table = self.hdf5file.getNode(fastq_paths['complement'])
-			self.complement_events = [Event(x) for x in table.Events]
-		except Exception, e:
-			self.complement_events = []
-
-	def _get_metadata(self):
-		try:
-			self.keyinfo = self.hdf5file.getNode('/UniqueGlobalKey')
-		except Exception, e:
-			try:
-				self.keyinfo = self.hdf5file.getNode('/Key')
-			except Exception, e:
-				self.keyinfo = None
-				logger.warning("Cannot find keyinfo. Exiting.\n")
diff --git a/poretools/events.py b/poretools/events.py
index 3391f3e..c0315ec 100644
--- a/poretools/events.py
+++ b/poretools/events.py
@@ -8,13 +8,17 @@ def run(parser, args):
 			'p_model_state', 'mp_model_state', 'p_mp_model_state', \
 			'p_A', 'p_C', 'p_G', 'p_T', 'raw_index']
 	print "\t".join(keys)
-	
-	for fast5 in Fast5File.Fast5FileSet(args.files):
 
-		for event in fast5.get_template_events():
-			print '\t'.join([fast5.filename, 'template', str(event)]) 
-		for event in fast5.get_complement_events():
-			print '\t'.join([fast5.filename, 'complement', str(event)]) 
+	if args.pre_basecalled:
+		for fast5 in Fast5File.Fast5FileSet(args.files):
+			for event in fast5.get_pre_basecalled_events(): 
+				print '\t'.join([fast5.filename, 'pre_basecalled', str(event)])
+	else:
+		for fast5 in Fast5File.Fast5FileSet(args.files):
+			for event in fast5.get_template_events():
+				print '\t'.join([fast5.filename, 'template', str(event)]) 
+			for event in fast5.get_complement_events():
+				print '\t'.join([fast5.filename, 'complement', str(event)]) 
 
 		fast5.close()
 
diff --git a/poretools/fasta.py b/poretools/fasta.py
index a442a23..c65b58f 100644
--- a/poretools/fasta.py
+++ b/poretools/fasta.py
@@ -3,7 +3,7 @@ import sys
 
 def run(parser, args):
 
-	for fast5 in Fast5File.Fast5FileSet(args.files):
+	for fast5 in Fast5File.Fast5FileSet(args.files, args.group):
 
 		if args.start_time or args.end_time:
 			read_start_time = fast5.get_start_time()
@@ -37,8 +37,10 @@ def run(parser, args):
 
 		for fa in fas:
 			if fa is None or \
-			len(fa.seq) < args.min_length:			
-				continue
+			len(fa.seq) < args.min_length or \
+			(len(fa.seq) > args.max_length and \
+			args.max_length > 0):			
+				continue			
 
 			print fa
 
diff --git a/poretools/fastq.py b/poretools/fastq.py
index 3f3be43..314d264 100644
--- a/poretools/fastq.py
+++ b/poretools/fastq.py
@@ -3,7 +3,7 @@ import sys
 
 def run(parser, args):
 	
-	for fast5 in Fast5File.Fast5FileSet(args.files):
+	for fast5 in Fast5File.Fast5FileSet(args.files, args.group):
 
 		if args.start_time or args.end_time:
 			read_start_time = fast5.get_start_time()
@@ -16,15 +16,30 @@ def run(parser, args):
 				continue
 
 		fas = fast5.get_fastqs(args.type)
+
+		# high quality 2D: means there are more nanopore events on the 
+		# complement strand than on the template strand. We also
+		# require there to be a 2D base-called sequence from Metrichor.
 		if args.high_quality:
-			if fast5.get_complement_events_count() <= \
-			   fast5.get_template_events_count():
+			if (fast5.get_complement_events_count() <= \
+			   fast5.get_template_events_count()) or not fast5.has_2D():
+				fast5.close()
+				continue
+
+		# norem quality 2D : means there are less (or equal) nanopore 
+		# events on the complement strand than on the template strand. 
+		# We also require there to be a 2D base-called sequence from Metrichor.
+		if args.normal_quality:
+			if (fast5.get_complement_events_count() > \
+			   fast5.get_template_events_count()) or not fast5.has_2D():
 				fast5.close()
 				continue
 
 		for fa in fas:
 			if fa is None or \
-			len(fa.seq) < args.min_length:			
+			len(fa.seq) < args.min_length or \
+			(len(fa.seq) > args.max_length and \
+			args.max_length > 0):			
 				continue
 
 			print fa
diff --git a/poretools/formats.py b/poretools/formats.py
index 77b5a5c..29bc7b5 100644
--- a/poretools/formats.py
+++ b/poretools/formats.py
@@ -9,6 +9,20 @@ class Fastq(object):
 	def __repr__(self):
 		return '\n'.join([self.name, self.seq, self.sep, self.qual])
 
+	def est_error_rate(self):
+		"""
+		Returns an error rate estimate using the Phred quality scores.
+		"""
+		try:
+			error_count = 0.0
+			for score in self.qual:
+				phred = ord(score) - 33
+				error_count += 10.0 ** (-phred / 10.0)
+			return error_count / len(self.qual)
+		except Exception, e:
+			return 0.0
+
+
 
 class Fasta(object):
 	def __init__(self, s):
diff --git a/poretools/hist.py b/poretools/hist.py
index eeaa020..b16c683 100644
--- a/poretools/hist.py
+++ b/poretools/hist.py
@@ -1,75 +1,44 @@
 import sys
+import time
+
+import matplotlib
+#matplotlib.use('Agg') # Must be called before any other matplotlib calls
+from matplotlib import pyplot as plt
+
+import seaborn as sns
 import Fast5File
-import rpy2.robjects as robjects
-import rpy2.robjects.lib.ggplot2 as ggplot2
-from rpy2.robjects.packages import importr
 
-#logging
 import logging
 logger = logging.getLogger('poretools')
 logger.setLevel(logging.INFO)
 
 def plot_hist(sizes, args):
-	"""
-	Use rpy2 to plot a histogram of the read sizes
-	"""
-	r = robjects.r
-	r.library("ggplot2")
-	grdevices = importr('grDevices')
-
-	sizes = robjects.IntVector([s for s in sizes \
-                if s < args.max_length and s > args.min_length])
-
-	sizes_min = min(sizes)
-	sizes_max = max(sizes)
-
-	binwidth = (sizes_max - sizes_min) / args.num_bins
+    """
+    plot a histogram of the read sizes
+    """
+    sizes = [s for s in sizes if args.min_length < s < args.max_length]
 
-	d = {'sizes' : sizes}
-	df = robjects.DataFrame(d)
+    if args.theme_bw:
+        sns.set_style("whitegrid")
+    plt.hist(sizes, args.num_bins)
+    plt.xlabel('sizes')
 
-        # plot
-        gp = ggplot2.ggplot(df)
-
-        if not args.theme_bw:
-            pp = gp + ggplot2.aes_string(x='sizes') \
-	                + ggplot2.geom_histogram(binwidth=binwidth)
-        else:
-            pp = gp + ggplot2.aes_string(x='sizes') \
-                + ggplot2.geom_histogram(binwidth=binwidth) \
-                + ggplot2.theme_bw()	    	
-
-	if args.saveas is not None:
-		plot_file = args.saveas
-		if plot_file.endswith(".pdf"):
-			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
-		elif plot_file.endswith(".png"):
-			grdevices.png(plot_file, width = 8.5, height = 8.5, 
-				units = "in", res = 300)
-		else:
-			logger.error("Unrecognized extension for %s!" % (plot_file))
-			sys.exit()
-
-		pp.plot()
-		grdevices.dev_off()
-	else:
-		pp.plot()
-		# keep the plot open until user hits enter
-		print('Type enter to exit.')
-		raw_input()
+    if args.saveas is not None:
+        plt.savefig(args.saveas)
+    else:
+        plt.show()
 
 def run(parser, args):
-	sizes = []
-	files_processed = 0
-	for fast5 in Fast5File.Fast5FileSet(args.files):
-		fq = fast5.get_fastq()
-		if fq is not None:
-			sizes.append(len(fq.seq))
-		files_processed += 1
-		if files_processed % 100 == 0:
-			logger.info("%d files processed." % files_processed)
-		fast5.close()
-
-	plot_hist(sizes, args)
-
+    sizes = []
+    files_processed = 0
+
+    for fast5 in Fast5File.Fast5FileSet(args.files):
+        fq = fast5.get_fastq()
+        if fq is not None:
+            sizes.append(len(fq.seq))
+        files_processed += 1
+        if files_processed % 100 == 0:
+            logger.info("%d files processed." % files_processed)
+        fast5.close()
+    plot_hist(sizes, args)
 
diff --git a/poretools/index.py b/poretools/index.py
new file mode 100644
index 0000000..abc1af0
--- /dev/null
+++ b/poretools/index.py
@@ -0,0 +1,66 @@
+import Fast5File
+import datetime
+
+############
+#
+#	index
+#
+#   A tool to extract
+#	all info needed to 
+#	identify a pile of 
+#	unsorted reads from 
+#	multiple MinION
+#	sequencing 
+#	experiments.
+#
+############
+
+def run(parser, args):
+
+	print "source_filename\ttemplate_fwd_length\tcomplement_rev_length\t2d_length\tasic_id\tasic_temp\theatsink_temp\tchannel\texp_start_time\texp_start_time_string_date\texp_start_time_string_time\tstart_time\tstart_time_string_date\tstart_time_string_time\tduration\tfast5_version"
+
+	for fast5 in Fast5File.Fast5FileSet(args.files):
+		
+		
+		# run and flowcell parameters
+		asic_temp  = fast5.get_asic_temp()
+		asic_id = fast5.get_asic_id()
+		heatsink_temp = fast5.get_heatsink_temp()
+		channel_number = fast5.get_channel_number()
+		
+		# try and get timing info
+		try:
+			start_time = fast5.get_start_time()
+			start_time_string = datetime.datetime.fromtimestamp(float(start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
+			exp_start_time = fast5.get_exp_start_time()
+			exp_start_time_string = datetime.datetime.fromtimestamp(float(exp_start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
+			duration = fast5.get_duration()
+		except KeyError:	
+			start_time = "Not found"
+			start_time_string = "NA\tNA"
+			exp_start_time = "Not found"
+			exp_start_time_string = "NA\tNA"
+			duration = "Not found"
+		
+		# sequence file info
+		fast5_version = fast5.guess_version()
+		
+		# read info
+		fastq_reads = fast5.get_fastqs('all')
+		length_template = None
+		length_complement = None
+		length_2d = None
+		if (len(fastq_reads) > 0):
+			length_template = len(fastq_reads[0].seq)
+		if (len(fastq_reads) > 2):
+			length_complement = len(fastq_reads[1].seq)
+			length_2d = len(fastq_reads[2].seq)
+
+		print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
+			fast5.filename,
+			length_template,
+			length_complement,
+			length_2d,		
+			asic_id, asic_temp, heatsink_temp,channel_number,exp_start_time,exp_start_time_string,start_time,start_time_string,duration,fast5_version)
+
+		fast5.close()
diff --git a/poretools/metadata.py b/poretools/metadata.py
new file mode 100644
index 0000000..958a675
--- /dev/null
+++ b/poretools/metadata.py
@@ -0,0 +1,22 @@
+import Fast5File
+
+def run(parser, args):
+
+	if args.read:
+		for i, fast5 in enumerate(Fast5File.Fast5FileSet(args.files)):
+			for metadata_dict in fast5.read_metadata:
+				if i == 0:
+					header = metadata_dict.keys()
+					print "\t".join(["filename"] + header)
+				print "\t".join([fast5.filename] + [str( metadata_dict[k] ) for k in header])
+	else:
+		print "asic_id\tasic_temp\theatsink_temp"
+		for fast5 in Fast5File.Fast5FileSet(args.files):
+
+			asic_temp  = fast5.get_asic_temp()
+			asic_id = fast5.get_asic_id()
+			heatsink_temp = fast5.get_heatsink_temp()
+
+			print "%s\t%s\t%s" % (asic_id, asic_temp, heatsink_temp)
+
+			fast5.close()
diff --git a/poretools/occupancy.py b/poretools/occupancy.py
index d944c24..532d6c2 100644
--- a/poretools/occupancy.py
+++ b/poretools/occupancy.py
@@ -1,169 +1,84 @@
 import Fast5File
-from time import strftime, localtime
-from collections import defaultdict, Counter
-import rpy2.robjects.lib.ggplot2 as gg
-import rpy2.robjects as robjects
-from rpy2.robjects.packages import importr
+from collections import Counter
 import sys
-import string
-import random
+import pandas as pd
+import seaborn as sns
+from matplotlib import pyplot as plt
 
-#logging
 import logging
 logger = logging.getLogger('poretools')
 
 def minion_flowcell_layout():
-	seeds = [125,121,117,113,109,105,101,97,
-	         93,89,85,81,77,73,69,65,
-	         61,57,53,49,45,41,37,33,
-	         29,25,21,17,13,9,5,1]
-	
-	flowcell_layout = []
-	for s in seeds:
-		for block in range(4):
-			for row in range(4):
-				flowcell_layout.append(s + 128*block + row)
-	return flowcell_layout
-
-def plot_read_count(parser, args, tot_reads_per_pore):
-	"""
-	Plot the pore performance
-	"""
-	r = robjects.r
-	r.library("ggplot2")
-	grdevices = importr('grDevices')
-
-	flowcell_layout = minion_flowcell_layout()
-
-	pore_values = []
-	for pore in flowcell_layout:
-		if pore in tot_reads_per_pore:
-			pore_values.append(tot_reads_per_pore[pore])
-		else:
-			pore_values.append(0)		
-	
-	# make a data frame of the lists
-	d = {'rownum': robjects.IntVector(range(1,17)*32),
-		 'colnum': robjects.IntVector(sorted(range(1,33)*16)),
-		 'tot_reads': robjects.IntVector(pore_values),
-		 'labels': robjects.IntVector(flowcell_layout)
-		 }
-
-	df = robjects.DataFrame(d)
-	gp = gg.ggplot(df)
-	pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
-		                         x = 'factor(colnum)') \
-            + gg.geom_point(gg.aes_string(color='tot_reads'), size = 7) \
-            + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
-            + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
-            + gg.coord_fixed(ratio=1.4) \
-            + gg.labs(x=gg.NULL, y=gg.NULL)
-
-	if args.saveas is not None:
-		plot_file = args.saveas
-		if plot_file.endswith(".pdf"):
-			grdevices.pdf(plot_file, width = 11, height = 8.5)
-		elif plot_file.endswith(".png"):
-			grdevices.png(plot_file, width = 11, height = 8.5, 
-				units = "in", res = 300)
-		else:
-			logger.error("Unrecognized extension for %s!" % (plot_file))
-			sys.exit()
-
-		pp.plot()
-		grdevices.dev_off()
-	else:
-		pp.plot()
-		# keep the plot open until user hits enter
-		print('Type enter to exit.')
-		raw_input()
-
-
-def plot_total_bp(parser, args, tot_bp_per_pore):
-	"""
-	Plot the pore performance
-	"""
-	import math
-	r = robjects.r
-	r.library("ggplot2")
-	grdevices = importr('grDevices')
-
-	flowcell_layout = minion_flowcell_layout()
-
-	pore_values = []
-	for pore in flowcell_layout:
-		if pore in tot_bp_per_pore:
-			pore_values.append(math.log10(tot_bp_per_pore[pore]))
-		else:
-			pore_values.append(0)		
-	
-	# make a data frame of the lists
-	d = {'rownum': robjects.IntVector(range(1,17)*32),
-		 'colnum': robjects.IntVector(sorted(range(1,33)*16)),
-		 'log10_tot_bp': robjects.IntVector(pore_values),
-		 'labels': robjects.IntVector(flowcell_layout)
-		 }
-
-	df = robjects.DataFrame(d)
-	gp = gg.ggplot(df)
-	pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
-		                         x = 'factor(colnum)') \
-            + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \
-            + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
-            + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
-            + gg.coord_fixed(ratio=1.4) \
-            + gg.labs(x=gg.NULL, y=gg.NULL)
-
-	if args.saveas is not None:
-		plot_file = args.saveas
-		if plot_file.endswith(".pdf"):
-			grdevices.pdf(plot_file, width = 11, height = 8.5)
-		elif plot_file.endswith(".png"):
-			grdevices.png(plot_file, width = 11, height = 8.5, 
-				units = "in", res = 300)
-		else:
-			logger.error("Unrecognized extension for %s!" % (plot_file))
-			sys.exit()
-
-		pp.plot()
-		grdevices.dev_off()
-	else:
-		pp.plot()
-		# keep the plot open until user hits enter
-		print('Type enter to exit.')
-		raw_input()
-
+    seeds = [125, 121, 117, 113, 109, 105, 101, 97,
+             93, 89, 85, 81, 77, 73, 69, 65,
+             61, 57, 53, 49, 45, 41, 37, 33,
+             29, 25, 21, 17, 13, 9, 5, 1]
+
+    flowcell_layout = []
+    for s in seeds:
+        for block in range(4):
+            for row in range(4):
+                    flowcell_layout.append(s + 128*block + row)
+    return flowcell_layout
+
+def plot_performance(parser, args, pore_measure):
+    """
+    Plot the pore performance in terms of reads per pore
+    """
+    flowcell_layout = minion_flowcell_layout()
+
+    pore_values = []
+    for pore in flowcell_layout:
+        if pore in pore_measure:
+            pore_values.append(pore_measure[pore])
+        else:
+            pore_values.append(0)
+
+    # make a data frame of the lists
+    d = {'rownum': range(1,17)*32,
+        'colnum': sorted(range(1,33)*16),
+        'tot_reads': pore_values,
+        'labels': flowcell_layout}
+    df = pd.DataFrame(d)
+
+    d = df.pivot("rownum", "colnum", "tot_reads")
+    sns.heatmap(d, annot=True, fmt="d", linewidths=.5)
+
+    if args.saveas is not None:
+        plot_file = args.saveas
+        plt.savefig(plot_file, figsize=(8.5, 8.5))
+    else:
+        plt.show()
 
 def run(parser, args):
 
-	tot_reads_per_pore = Counter()
-	tot_bp_per_pore = Counter()
-
-	print "\t".join(['channel_number', 'start_time', 'duration'])
-	for fast5 in Fast5File.Fast5FileSet(args.files):
-		if fast5.is_open:
-			fq = fast5.get_fastq()
-			
-			start_time = fast5.get_start_time()
-			if start_time is None:
-				logger.warning("No start time for %s!" % (fast5.filename))
-				fast5.close()
-				continue
-
-			pore_id = fast5.get_channel_number()
-			tot_reads_per_pore[int(pore_id)] += 1
-			tot_bp_per_pore[int(pore_id)] += len(fq.seq)
-
-			lt = localtime(start_time)
-			print "\t".join([
-				str(pore_id),
-				str(start_time),
-				str(fast5.get_duration())])
-			fast5.close()
-
-	if args.plot_type == 'read_count':
-		plot_read_count(parser, args, tot_reads_per_pore)
-	elif args.plot_type == 'total_bp':
-		plot_total_bp(parser, args, tot_bp_per_pore)
+    tot_reads_per_pore = Counter()
+    tot_bp_per_pore = Counter()
+
+    print "\t".join(['channel_number', 'start_time', 'duration'])
+    for fast5 in Fast5File.Fast5FileSet(args.files):
+        if fast5.is_open:
+            fq = fast5.get_fastq()
+
+            start_time = fast5.get_start_time()
+            if start_time is None:
+                logger.warning("No start time for %s!" % (fast5.filename))
+                fast5.close()
+                continue
+
+            pore_id = fast5.get_channel_number()
+            tot_reads_per_pore[int(pore_id)] += 1
+            tot_bp_per_pore[int(pore_id)] += len(fq.seq)
+
+            print "\t".join([
+                str(pore_id),
+                str(start_time),
+                str(fast5.get_duration())])
+            fast5.close()
+
+    if args.plot_type == 'read_count':
+        plot_performance(parser, args, tot_reads_per_pore)
+    elif args.plot_type == 'total_bp':
+        plot_performance(parser, args, tot_bp_per_pore)
 
 
diff --git a/poretools/organise.py b/poretools/organise.py
new file mode 100644
index 0000000..7b3e578
--- /dev/null
+++ b/poretools/organise.py
@@ -0,0 +1,39 @@
+import Fast5File
+import sys
+import os
+from os import makedirs
+import os.path
+import shutil
+
+#logging
+import logging
+logger = logging.getLogger('poretools')
+logger.setLevel(logging.INFO)
+
+def run(parser, args):
+	if not os.path.isdir(args.dest):
+                logger.error('destination directory needs to exist')
+                return
+
+	for fast5 in Fast5File.Fast5FileSet(args.files):
+
+		#offset = fast5.get_start_time() - fast5.get_exp_start_time()
+
+		specific_id = fast5.get_sample_name()
+		if not specific_id:
+			specific_id = fast5.get_asic_id()
+
+		path = "%s/%s" % (args.dest, specific_id)
+		if not os.path.isdir(path):
+			makedirs(path)
+
+		#fas = fast5.get_fastas(args.type)
+
+		fast5.close()
+
+		filename = os.path.split(fast5.filename)[1]
+		if args.copy:
+			shutil.copyfile(fast5.filename, path + '/' + filename)
+		else:
+			shutil.move(fast5.filename, path + '/' + filename)
+
diff --git a/poretools/poretools_main.py b/poretools/poretools_main.py
index d6d4c6e..22d0aa9 100755
--- a/poretools/poretools_main.py
+++ b/poretools/poretools_main.py
@@ -22,12 +22,16 @@ def run_subtool(parser, args):
         import fastq as submodule
     elif args.command == 'hist':
         import hist as submodule
+    elif args.command == 'metadata':
+        import metadata as submodule
     elif args.command == 'nucdist':
         import nucdist as submodule
     elif args.command == 'occupancy':
         import occupancy as submodule
     elif args.command == 'qualdist':
         import qualdist as submodule
+    elif args.command == 'qualpos':
+        import qual_v_pos as submodule
     elif args.command == 'readstats':
         import readstats as submodule
     elif args.command == 'stats':
@@ -42,6 +46,10 @@ def run_subtool(parser, args):
         import winner as submodule
     elif args.command == 'yield_plot':
         import yield_plot as submodule
+    elif args.command == 'index':
+        import index as submodule
+    elif args.command == 'organise':
+        import organise as submodule
 
     # run the chosen submodule.
     submodule.run(parser, args)
@@ -94,9 +102,9 @@ def main():
     parser_fastq.add_argument('--type',
                               dest='type',
                               metavar='STRING',
-                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev'],
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
                               default='all',
-                              help='Which type of FASTA entries should be reported? Def.=all')
+                              help='Which type of FASTQ entries should be reported? Def.=all')
     parser_fastq.add_argument('--start',
                               dest='start_time',
                               default=None,
@@ -111,12 +119,27 @@ def main():
                               dest='min_length',
                               default=0,
                               type=int,
-                              help=('Minimum read length for FASTA entry to be reported.'))
+                              help=('Minimum read length for FASTQ entry to be reported.'))
+    parser_fastq.add_argument('--max-length',
+                              dest='max_length',
+                              default=-1,
+                              type=int,
+                              help=('Maximum read length for FASTQ entry to be reported.'))                          
     parser_fastq.add_argument('--high-quality',
                               dest='high_quality',
                               default=False,
                               action='store_true',
-                              help=('Only report reads with more complement events than template.'))
+                              help=('Only report reads with more complement events than template.'))   
+    parser_fastq.add_argument('--normal-quality',
+                              dest='normal_quality',
+                              default=False,
+                              action='store_true',
+                              help=('Only report reads with fewer complement events than template.'))
+    parser_fastq.add_argument('--group',
+                              dest='group',
+                              default=0,
+                              type=int,
+                              help=('Base calling group serial number to extract, default 000'))
     parser_fastq.set_defaults(func=run_subtool)
 
 
@@ -130,7 +153,7 @@ def main():
     parser_fasta.add_argument('--type',
                               dest='type',
                               metavar='STRING',
-                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev'],
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
                               default='all',
                               help='Which type of FASTQ entries should be reported? Def.=all')
     parser_fasta.add_argument('--start',
@@ -148,6 +171,11 @@ def main():
                               default=0,
                               type=int,
                               help=('Minimum read length for FASTA entry to be reported.'))
+    parser_fasta.add_argument('--max-length',
+                              dest='max_length',
+                              default=-1,
+                              type=int,
+                              help=('Maximum read length for FASTA entry to be reported.'))                          
     parser_fasta.add_argument('--high-quality',
                               dest='high_quality',
                               default=False,
@@ -158,6 +186,11 @@ def main():
                               default=False,
                               action='store_true',
                               help=('Only report reads with fewer complement events than template.'))
+    parser_fasta.add_argument('--group',
+                              dest='group',
+                              default=0,
+                              type=int,
+                              help=('Base calling group serial number to extract, default 000'))
     parser_fasta.set_defaults(func=run_subtool)
 
 
@@ -171,7 +204,7 @@ def main():
     parser_stats.add_argument('--type',
                               dest='type',
                               metavar='STRING',
-                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev'],
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
                               default='all',
                               help='Which type of FASTQ entries should be reported? Def.=all')
     parser_stats.add_argument('--full-tsv',
@@ -213,8 +246,12 @@ def main():
                              dest='theme_bw',
                              default=False,
                              action='store_true',
-                             help="Use the ggplot2 black and white theme.")
-
+                             help="Use a black and white theme.")
+    parser_hist.add_argument('--watch',
+                             dest='watch',
+                             default=False,
+                             action='store_true',
+                             help="Monitor a directory.")
     parser_hist.set_defaults(func=run_subtool)
 
 
@@ -225,9 +262,14 @@ def main():
                                         help='Extract each nanopore event for each read.')
     parser_events.add_argument('files', metavar='FILES', nargs='+',
                              help='The input FAST5 files.')
+    parser_events.add_argument('--pre-basecalled',
+                              dest='pre_basecalled',
+                              default=False,
+                              action='store_true',
+                              help=('Report pre-basecalled events'))     
     parser_events.set_defaults(func=run_subtool)
 
-    
+
     ###########
     # readstats
     ###########
@@ -248,12 +290,11 @@ def main():
     parser_tabular.add_argument('--type',
                               dest='type',
                               metavar='STRING',
-                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev'],
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
                               default='all',
                               help='Which type of FASTA entries should be reported? Def.=all')
     parser_tabular.set_defaults(func=run_subtool)
 
-    
     #########
     # nucdist
     #########
@@ -263,6 +304,29 @@ def main():
                              help='The input FAST5 files.')
     parser_nucdist.set_defaults(func=run_subtool)
 
+    #########
+    # metadata
+    #########
+    parser_metadata = subparsers.add_parser('metadata',
+                                        help='Return run metadata such as ASIC ID and temperature from a set of FAST5 files')
+    parser_metadata.add_argument('files', metavar='FILES', nargs='+',
+                             help='The input FAST5 files.')
+    parser_metadata.add_argument('--read',
+                              dest='read',
+                              default=False,
+                              action='store_true',
+                              help=('Report read level metadata'))      
+    parser_metadata.set_defaults(func=run_subtool)
+    
+    #########
+    # index
+    #########
+    parser_index = subparsers.add_parser('index',
+                                        help='Tabulate all file location info and metadata such as ASIC ID and temperature from a set of FAST5 files')
+    parser_index.add_argument('files', metavar='FILES', nargs='+',
+                             help='The input FAST5 files.')
+    parser_index.set_defaults(func=run_subtool)
+
     
     ##########
     # qualdist
@@ -274,6 +338,61 @@ def main():
     parser_qualdist.set_defaults(func=run_subtool)
 
 
+
+    ##########
+    # qual vs. position
+    ##########
+    parser_qualpos = subparsers.add_parser('qualpos',
+                                        help='Get the qual score distribution over positions in reads')
+    parser_qualpos.add_argument('files', metavar='FILES', nargs='+',
+                             help='The input FAST5 files.')
+    parser_qualpos.set_defaults(func=run_subtool)
+    parser_qualpos.add_argument('--min-length',
+                              dest='min_length',
+                              default=0,
+                              type=int,
+                              help=('Minimum read length to be included in analysis.'))
+    parser_qualpos.add_argument('--max-length',
+                              dest='max_length',
+                              default=1000000000,
+                              type=int,
+                              help=('Maximum read length to be included in analysis.'))
+    parser_qualpos.add_argument('--bin-width',
+                              dest='bin_width',
+                              default=1000,
+                              type=int,
+                              help=('The width of bins (default: 1000 bp).'))
+    parser_qualpos.add_argument('--type',
+                              dest='type',
+                              metavar='STRING',
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
+                              default='all',
+                              help='Which type of reads should be analyzed? Def.=all, choices=[all, fwd, rev, 2D, fwd,rev, best]')
+    parser_qualpos.add_argument('--start',
+                              dest='start_time',
+                              default=None,
+                              type=int,
+                              help='Only analyze reads from after start timestamp')
+    parser_qualpos.add_argument('--end',
+                              dest='end_time',
+                              default=None,
+                              type=int,
+                              help='Only analyze reads from before end timestamp')
+    parser_qualpos.add_argument('--high-quality',
+                              dest='high_quality',
+                              default=False,
+                              action='store_true',
+                              help='Only analyze reads with more complement events than template.')
+
+    parser_qualpos.add_argument('--saveas',
+                             dest='saveas',
+                             metavar='STRING',
+                             help='''Save the plot to a file named filename.extension (e.g. pdf, jpg)''',
+                             default=None)
+
+
+
+
     ##########
     # winner
     ##########
@@ -284,7 +403,7 @@ def main():
     parser_winner.add_argument('--type',
                               dest='type',
                               metavar='STRING',
-                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev'],
+                              choices=['all', 'fwd', 'rev', '2D', 'fwd,rev', 'best'],
                               default='all',
                               help='Which type of FASTA entries should be reported? Def.=all')
     parser_winner.set_defaults(func=run_subtool)
@@ -312,7 +431,7 @@ def main():
                              dest='theme_bw',
                              default=False,
                              action='store_true',
-                             help="Use the ggplot2 black and white theme.")
+                             help="Use a black and white theme.")
 
     parser_squiggle.set_defaults(func=run_subtool)
 
@@ -347,12 +466,7 @@ def main():
                              dest='theme_bw',
                              default=False,
                              action='store_true',
-                             help="Use the ggplot2 black and white theme.")
-    parser_yield_plot.add_argument('--extrapolate',
-                             dest='extrapolate',
-                             metavar='INTEGER',
-                             default=0,
-                             help="Fit a curve and extrapolate to n hours")
+                             help="Use a black and white theme.")
     parser_yield_plot.add_argument('--skip',
                              dest='skip',
                              metavar='INTEGER',
@@ -364,7 +478,7 @@ def main():
                              metavar='STRING',
                              help='Save the data frame used to construct plot to a file.',
                              default=None)
-                             
+
     parser_yield_plot.set_defaults(func=run_subtool)
 
     ############
@@ -386,8 +500,26 @@ def main():
                              help='The type of plot to generate',
                              default='read_count')
 
-
     parser_occupancy.set_defaults(func=run_subtool)
+    
+    ##########
+    # organise
+    ##########
+
+    parser_organise = subparsers.add_parser('organise',
+                               help='Move FAST5 files into a useful folder hierarchy')
+    parser_organise.add_argument('files', metavar='FILES', nargs='+',
+                               help='The input FAST5 files.')
+    parser_organise.add_argument('dest',
+                               metavar='STRING',
+                               help='The destination directory.')
+    parser_organise.add_argument('--copy',
+                               default=False,
+                               action='store_true',
+                               dest='copy', 
+                               help='Make a copy of files instead of moving')
+
+    parser_organise.set_defaults(func=run_subtool)
 
     #######################################################
     # parse the args and call the selected function
diff --git a/poretools/qual_v_pos.py b/poretools/qual_v_pos.py
new file mode 100644
index 0000000..c310b1e
--- /dev/null
+++ b/poretools/qual_v_pos.py
@@ -0,0 +1,66 @@
+import Fast5File
+from collections import defaultdict
+import pandas
+import matplotlib.pyplot as plt
+
+#logging
+import logging
+logger = logging.getLogger('poreminion')
+logger.setLevel(logging.INFO)
+
+def run(parser, args):
+    """ returns boxplot with qual scores for each bin/position"""
+    qualpos = defaultdict(list)
+    bin_width = args.bin_width
+    
+    for fast5 in Fast5File.Fast5FileSet(args.files):
+        if args.start_time or args.end_time:
+                read_start_time = fast5.get_start_time()
+                read_end_time = fast5.get_end_time()
+                if args.start_time and args.start_time > read_start_time:
+                        fast5.close()
+                        continue
+                if args.end_time and args.end_time < read_end_time:
+                        fast5.close()
+                        continue
+
+        fqs = fast5.get_fastqs(args.type)
+        if args.high_quality:
+                if fast5.get_complement_events_count() <= \
+                   fast5.get_template_events_count():
+                        fast5.close()
+                        continue
+
+        for fq in fqs:
+                if fq is None or len(fq.seq) < args.min_length or len(fq.seq) > args.max_length:			
+                        continue
+
+                ctr = 0
+                for q in fq.qual:
+                    ctr += 1
+                    qualpos[1+int(ctr//bin_width)].append(ord(q)-33)
+
+        fast5.close()
+
+    logger.info("Processing data...")
+    data = [qualpos[e] for e in sorted(qualpos.keys())]
+    logger.info("Constructing box plot...")
+    plt.boxplot(data)
+    xdetail = " (" + str(bin_width) + " bp bins)"
+    plt.xlabel("Bin number in read" + xdetail)
+    plt.ylabel("Quality score")
+    plt.xticks(rotation=65, fontsize=8)
+    if args.saveas is not None:
+            logger.info("Writing plot to file...")
+            plot_file = args.saveas
+            if plot_file.endswith(".pdf") or plot_file.endswith(".jpg"):
+                    plt.savefig(plot_file)
+            else:
+                    logger.error("Unrecognized extension for %s! Try .pdf or .jpg" % (plot_file))
+                    sys.exit()
+
+    else:
+            logger.info("Showing plot...")
+            plt.show()
+
+
diff --git a/poretools/squiggle.py b/poretools/squiggle.py
index 311964f..cd8d667 100644
--- a/poretools/squiggle.py
+++ b/poretools/squiggle.py
@@ -1,8 +1,10 @@
 import os
 import sys
-import rpy2.robjects as robjects
-import rpy2.robjects.lib.ggplot2 as ggplot2
-from rpy2.robjects.packages import importr
+import pandas as pd
+import seaborn as sns
+from matplotlib import rcParams
+rcParams.update({'figure.autolayout': True})
+from matplotlib import pyplot as plt
 
 #logging
 import logging
@@ -11,97 +13,73 @@ logger = logging.getLogger('poretools')
 import Fast5File
 
 def plot_squiggle(args, filename, start_times, mean_signals):
-	"""
-	Use rpy2 to create a squiggle plot of the read
-	"""
-	r = robjects.r
-	r.library("ggplot2")
-	grdevices = importr('grDevices')
-
-	# set t_0 as the first measured time for the read.
-	t_0 = start_times[0]
-	total_time = start_times[-1] - start_times[0]
-	# adjust times to be relative to t_0
-	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
-	r_mean_signals = robjects.FloatVector(mean_signals)
-	
-	# infer the appropriate number of events given the number of facets
-	num_events = len(r_mean_signals)
-	events_per_facet = (num_events / args.num_facets) + 1
-	# dummy variable to control faceting
-	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])
-
-	# make a data frame of the start times and mean signals
-	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
-	df = robjects.DataFrame(d)
-
-	gp = ggplot2.ggplot(df)
-	if not args.theme_bw:
-		pp = gp + ggplot2.aes_string(x='start', y='mean') \
-			+ ggplot2.geom_step(size=0.25) \
-			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
-			+ ggplot2.scale_x_continuous('Time (seconds)') \
-			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
-			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
-			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
-	else:
-		pp = gp + ggplot2.aes_string(x='start', y='mean') \
-			+ ggplot2.geom_step(size=0.25) \
-			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
-			+ ggplot2.scale_x_continuous('Time (seconds)') \
-			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
-			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
-			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
-			+ ggplot2.theme_bw()
-
-	if args.saveas is not None:
-		plot_file = os.path.basename(filename) + "." + args.saveas
-		if os.path.isfile(plot_file):
-			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
-		if args.saveas == "pdf":
-			grdevices.pdf(plot_file, width = 8.5, height = 11)
-		elif args.saveas == "png":
-			grdevices.png(plot_file, width = 8.5, height = 11, 
-				units = "in", res = 300)
-		pp.plot()
-		grdevices.dev_off()
-	else:
-		pp.plot()
-		# keep the plot open until user hits enter
-		print('Type enter to exit.')
-		raw_input()
+    """
+    create a squiggle plot of the read
+    """
+
+    # set t_0 as the first measured time for the read.
+    t_0 = start_times[0]
+    total_time = start_times[-1] - start_times[0]
+    # adjust times to be relative to t_0
+    start_times = [t - t_0 for t in start_times]
+
+    # infer the appropriate number of events given the number of facets
+    num_events = len(mean_signals)
+    events_per_facet = (num_events / args.num_facets) + 1
+    # dummy variable to control faceting
+    facet_category = [(i / events_per_facet) + 1 for i in range(len(start_times))]
+
+    # make a data frame of the start times and mean signals
+    d = {'start': start_times, 'mean': mean_signals, 'cat': facet_category}
+    df = pd.DataFrame(d)
+
+    starts = df.groupby('cat')['start']
+    mins, maxs = list(starts.min()), list(starts.max())
+
+    grid = sns.FacetGrid(df, row="cat", sharex=False, size=8)
+    #plt.gcf().tight_layout()
+    grid.fig.suptitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time))
+    grid.map(plt.step, "start", "mean", marker=',', lw=1.0, where="mid")
+    for i, ax in enumerate(grid.axes.flat):
+        ax.set_xlim(mins[i], maxs[i])
+
+    if args.saveas is not None:
+        plot_file = os.path.basename(filename) + "." + args.saveas
+        plt.savefig(plot_file)
+    else:
+        plt.show()
 
 def do_plot_squiggle(args, fast5):
-	start_times = []
-	mean_signals = []
+    start_times = []
+    mean_signals = []
 
-	for event in fast5.get_template_events():
-		start_times.append(event.start)
-		mean_signals.append(event.mean)
+    for event in fast5.get_template_events():
+        start_times.append(event.start)
+        mean_signals.append(event.mean)
 
-	if start_times:
-		plot_squiggle(args, fast5.filename, start_times, mean_signals)
-	else:
-		logger.warning("Could not extract template events for read: %s.\n" \
-			% fast5.filename)
+    if start_times:
+        plot_squiggle(args, fast5.filename, start_times, mean_signals)
+    else:
+        logger.warning("Could not extract template events for read: %s.\n" \
+                        % fast5.filename)
 
-	fast5.close()
+    fast5.close()
 
 
 def run(parser, args):
 
-	fast5_set = Fast5File.Fast5FileSet(args.files)
-
-	first_fast5 = fast5_set.next()
-	for fast5 in fast5_set:
-		# only create a squiggle plot for multiple reads if saving to file.
-		if args.saveas is None:
-			sys.exit("""Please use --saveas when plotting"""
-					 """ multiple FAST5 files as input.\n""")
-		if first_fast5 is not None:
-			do_plot_squiggle(args, first_fast5)
-			first_fast5 = None
-		do_plot_squiggle(args, fast5)
-
-	if first_fast5 is not None:
-		do_plot_squiggle(args, first_fast5)
+    fast5_set = Fast5File.Fast5FileSet(args.files)
+
+    first_fast5 = fast5_set.next()
+    for fast5 in fast5_set:
+        # only create a squiggle plot for multiple reads if saving to file.
+        if args.saveas is None:
+            sys.exit("""Please use --saveas when plotting"""
+                     """ multiple FAST5 files as input.\n""")
+        if first_fast5 is not None:
+            do_plot_squiggle(args, first_fast5)
+            first_fast5 = None
+        do_plot_squiggle(args, fast5)
+
+    if first_fast5 is not None:
+        do_plot_squiggle(args, first_fast5)
diff --git a/poretools/times.py b/poretools/times.py
index 577ec39..d568cc0 100644
--- a/poretools/times.py
+++ b/poretools/times.py
@@ -32,7 +32,7 @@ def run(parser, args):
 			print "\t".join([fast5.get_channel_number(),
 				fast5.filename, 
 				str(read_length),
-				fast5.get_exp_start_time(),
+				str(fast5.get_exp_start_time()),
 				str(start_time), \
 				str(fast5.get_duration()),
 				str(fast5.get_end_time()),
diff --git a/poretools/version.py b/poretools/version.py
index b72a06e..d07e93f 100644
--- a/poretools/version.py
+++ b/poretools/version.py
@@ -1,2 +1,2 @@
-__version__="0.5.1"
+__version__ = "0.6.0"
 
diff --git a/poretools/windows.py b/poretools/windows.py
deleted file mode 100644
index f63e343..0000000
--- a/poretools/windows.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import _winreg
-import os
-
-current_version = None
-
-try:
-	key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, "SOFTWARE\\R-core\\R")
-except Exception:
-	key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, "SOFTWARE\\R-core\\R")
-
-version = _winreg.QueryValueEx(key, "Current Version")[0]
-install_path = _winreg.QueryValueEx(key, "InstallPath")[0]
-
-os.environ['R_HOME'] = install_path
-os.environ['R_USER'] = os.environ['HOMEPATH'] + '\\Documents'
-
-print "Setting R_HOME to %s" % (install_path,)
-print "Setting R_USER to %s" % (os.environ['R_USER'])
-
diff --git a/poretools/yield_plot.py b/poretools/yield_plot.py
index 26ce09e..af4b69b 100644
--- a/poretools/yield_plot.py
+++ b/poretools/yield_plot.py
@@ -1,8 +1,11 @@
 import Fast5File
-from time import strftime, localtime
-import rpy2.robjects.lib.ggplot2 as ggplot2
-import rpy2.robjects as robjects
-from rpy2.robjects.packages import importr
+import matplotlib
+#matplotlib.use('Agg') # Must be called before any other matplotlib calls
+from matplotlib import pyplot as plt
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
 
 #logging
 import logging
@@ -10,116 +13,85 @@ logger = logging.getLogger('poretools')
 logger.setLevel(logging.INFO)
 
 def plot_collectors_curve(args, start_times, read_lengths):
-	"""
-	Use rpy2 to create a collectors curve of the run
-	"""
-	r = robjects.r
-	r.library("ggplot2")
-	grdevices = importr('grDevices')
-
-	# set t_0 as the first measured time for the read.
-	t_0 = start_times[0]
-
-	# adjust times to be relative to t_0
-	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
-		for t in start_times])
-	r_read_lengths = robjects.IntVector(read_lengths)
-
-	# compute the cumulative based on reads or total base pairs
-	if args.plot_type == 'reads':
-		y_label = "Total reads"
-		cumulative = \
-			r.cumsum(robjects.IntVector([1] * len(start_times)))
-	elif args.plot_type == 'basepairs':
-		y_label = "Total base pairs"
-		cumulative = r.cumsum(r_read_lengths)
-
-	step = args.skip
-	# make a data frame of the lists
-	d = {'start': robjects.FloatVector([r_start_times[n] for n in xrange(0, len(r_start_times), step)]), 
-		'lengths': robjects.IntVector([r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]),
-		'cumul': robjects.IntVector([cumulative[n] for n in xrange(0, len(cumulative), step)])}
-	df = robjects.DataFrame(d)
-
-
-	if args.savedf:
-		robjects.r("write.table")(df, file=args.savedf, sep="\t")
-
-	# title
-	total_reads = len(read_lengths)
-	total_bp = sum(read_lengths)
-	plot_title = "Yield: " \
-		+ str(total_reads) + " reads and " \
-		+ str(total_bp) + " base pairs."
-
-	# plot
-	gp = ggplot2.ggplot(df)
-	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
-		+ ggplot2.geom_step(size=2) \
-		+ ggplot2.scale_x_continuous('Time (hours)') \
-		+ ggplot2.scale_y_continuous(y_label) \
-		+ ggplot2.ggtitle(plot_title)
-
-        # extrapolation
-	if args.extrapolate:
-		start = robjects.ListVector({'a': 1, 'b': 1})
-                pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
-                                              formula='y~a*I((x*3600)^b)',
-                                              se='FALSE', start=start) \
-                        + ggplot2.xlim(0, float(args.extrapolate))
-
-	if args.theme_bw:
-		pp = pp + ggplot2.theme_bw()	
-
-	if args.saveas is not None:
-		plot_file = args.saveas
-		if plot_file.endswith(".pdf"):
-			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
-		elif plot_file.endswith(".png"):
-			grdevices.png(plot_file, width = 8.5, height = 8.5, 
-				units = "in", res = 300)
-		else:
-			logger.error("Unrecognized extension for %s!" % (plot_file))
-			sys.exit()
-
-		pp.plot()
-		grdevices.dev_off()
-	else:
-		pp.plot()
-		# keep the plot open until user hits enter
-		print('Type enter to exit.')
-		raw_input()
+        """
+        collectors curve of the run
+        """
+        # set t_0 as the first measured time for the read.
+        t_0 = start_times[0]
+
+        # adjust times to be relative to t_0
+        start_times = [float(t - t_0) / float(3600) + 0.00000001 for t in start_times]
+
+        # compute the cumulative based on reads or total base pairs
+        if args.plot_type == 'reads':
+                y_label = "Total reads"
+                cumulative = np.cumsum(range(len(start_times)))
+        elif args.plot_type == 'basepairs':
+                y_label = "Total base pairs"
+                cumulative = np.cumsum(read_lengths)
+
+        step = args.skip
+        # make a data frame of the lists
+        d = {'start': [start_times[n] for n in xrange(0, len(start_times), step)],
+             'lengths': [read_lengths[n] for n in xrange(0, len(read_lengths), step)],
+             'cumul': [cumulative[n] for n in xrange(0, len(cumulative), step)]}
+        df = pd.DataFrame(d)
+
+        if args.savedf:
+            df.to_csv(args.savedf, sep="\t")
+
+        # title
+        total_reads = len(read_lengths)
+        total_bp = sum(read_lengths)
+        plot_title = "Yield: " \
+                + str(total_reads) + " reads and " \
+                + str(total_bp) + " base pairs."
+
+        if args.theme_bw:
+            sns.set_style("whitegrid")
+
+        # plot
+        plt.plot(df['start'], df['cumul'])
+        plt.xlabel('Time (hours)')
+        plt.ylabel(y_label)
+        plt.title(plot_title)
+
+        if args.saveas is not None:
+            plot_file = args.saveas
+            plt.savefig(plot_file, figsize=(8.5, 8.5))
+        else:
+            plt.show()
 
 def run(parser, args):
-	
-	start_times = []
-	read_lengths = []
-	files_processed = 0
-	for fast5 in Fast5File.Fast5FileSet(args.files):
-		if fast5.is_open:
-			
-			fq = fast5.get_fastq()
-			
-			start_time = fast5.get_start_time()
-			if start_time is None:
-				logger.warning("No start time for %s!" % (fast5.filename))
-				fast5.close()
-				continue
-
-			start_times.append(start_time)
-			if fq is not None:
-				read_lengths.append(len(fq.seq))
-			else:
-				read_lengths.append(0)
-			fast5.close()
-
-		files_processed += 1
-		if files_processed % 100 == 0:
-			logger.info("%d files processed." % files_processed)
-	
-
-
-	# sort the data by start time
-	start_times, read_lengths = (list(t) for t in zip(*sorted(zip(start_times, read_lengths))))
-	plot_collectors_curve(args, start_times, read_lengths)
+
+        start_times = []
+        read_lengths = []
+        files_processed = 0
+        for fast5 in Fast5File.Fast5FileSet(args.files):
+                if fast5.is_open:
+
+                        fq = fast5.get_fastq()
+
+                        start_time = fast5.get_start_time()
+                        if start_time is None:
+                                logger.warning("No start time for %s!" % (fast5.filename))
+                                fast5.close()
+                                continue
+
+                        start_times.append(start_time)
+                        if fq is not None:
+                                read_lengths.append(len(fq.seq))
+                        else:
+                                read_lengths.append(0)
+                        fast5.close()
+
+                files_processed += 1
+                if files_processed % 100 == 0:
+                        logger.info("%d files processed." % files_processed)
+
+
+
+        # sort the data by start time
+        start_times, read_lengths = (list(t) for t in zip(*sorted(zip(start_times, read_lengths))))
+        plot_collectors_curve(args, start_times, read_lengths)
 
diff --git a/requirements.txt b/requirements.txt
index 5fba58b..6a8e9e4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
-h5py>=2.0.0
-rpy2>=2.4.2
\ No newline at end of file
+h5py>=2.2.0
+matplotlib
+seaborn
+pandas
diff --git a/setup.py b/setup.py
index 89bdd6a..e5479f0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,41 +1,44 @@
 import os
-import sys
 from setuptools import setup
 
 version_py = os.path.join(os.path.dirname(__file__), 'poretools', 'version.py')
-version = open(version_py).read().strip().split('=')[-1].replace('"','')
+version = open(version_py).read().strip().split('=')[-1].replace('"','').strip()
+print version
 long_description = """
 ``poretools`` is a toolset for working with nanopore sequencing data'
 """
 
-with open("requirements.txt", "r") as f:
+HERE = os.path.dirname(__file__)
+
+with open(os.path.join(HERE, "requirements.txt"), "r") as f:
     install_requires = [x.strip() for x in f.readlines()]
 
 setup(
-        name="poretools",
-        version=version,
-        install_requires=install_requires,
-        requires = ['python (>=2.7, <3.0)'],
-        packages=['poretools',
-                  'poretools.scripts'],
-        author="Nick Loman and Aaron Quinlan",
-        description='A toolset for working with nanopore sequencing data',
-        long_description=long_description,
-        url="http://poretools.readthedocs.org",
-        package_dir = {'poretools': "poretools"},
-        package_data = {'poretools': []},
-        zip_safe = False,
-        include_package_data=True,
-        #scripts = ['poretools/scripts/poretools'],
-        entry_points = {
-            'console_scripts' : [
-                 'poretools = poretools.poretools_main:main', 
-            ],
-        },  
-        author_email="arq5x at virginia.edu",
-        classifiers=[
-            'Development Status :: 4 - Beta',
-            'Intended Audience :: Science/Research',
-            'License :: OSI Approved :: GNU General Public License (GPL)',
-            'Topic :: Scientific/Engineering :: Bio-Informatics']
+    name="poretools",
+    version=version,
+    install_requires=install_requires,
+    requires=['python (>=2.7, <3.0)'],
+    packages=['poretools',
+              'poretools.scripts'],
+    author="Nick Loman and Aaron Quinlan",
+    description='A toolset for working with nanopore sequencing data',
+    long_description=long_description,
+    url="http://poretools.readthedocs.org",
+    package_dir={'poretools': "poretools"},
+    package_data={'poretools': []},
+    zip_safe=False,
+    include_package_data=True,
+    #scripts=['poretools/scripts/poretools'],
+    entry_points={
+        'console_scripts': [
+            'poretools=poretools.poretools_main:main',
+        ],
+    },
+    author_email="arq5x at virginia.edu",
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: GNU General Public License (GPL)',
+        'Topic :: Scientific/Engineering :: Bio-Informatics'
+        ]
     )
diff --git a/test_data/2016_3_4_3507_1_ch120_read240_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read240_strand.fast5
new file mode 100644
index 0000000..45050cb
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read240_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read353_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read353_strand.fast5
new file mode 100644
index 0000000..2c20213
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read353_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read415_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read415_strand.fast5
new file mode 100644
index 0000000..0f8ec86
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read415_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read418_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read418_strand.fast5
new file mode 100644
index 0000000..8d07c79
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read418_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read433_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read433_strand.fast5
new file mode 100644
index 0000000..864ec1c
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read433_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read443_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read443_strand.fast5
new file mode 100644
index 0000000..3b15e66
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read443_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read505_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read505_strand.fast5
new file mode 100644
index 0000000..26f3007
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read505_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5
new file mode 100644
index 0000000..416be95
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read542_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read542_strand.fast5
new file mode 100644
index 0000000..9e630e0
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read542_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read586_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read586_strand.fast5
new file mode 100644
index 0000000..051d08c
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read586_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read635_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read635_strand.fast5
new file mode 100644
index 0000000..cfc0ce9
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read635_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read706_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read706_strand.fast5
new file mode 100644
index 0000000..6840889
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read706_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read83_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read83_strand.fast5
new file mode 100644
index 0000000..ce8cd1c
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read83_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch120_read89_strand.fast5 b/test_data/2016_3_4_3507_1_ch120_read89_strand.fast5
new file mode 100644
index 0000000..198321d
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch120_read89_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1066_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1066_strand.fast5
new file mode 100644
index 0000000..c877874
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1066_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1079_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1079_strand.fast5
new file mode 100644
index 0000000..d7317f6
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1079_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1169_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1169_strand.fast5
new file mode 100644
index 0000000..c9ee678
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1169_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1250_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1250_strand.fast5
new file mode 100644
index 0000000..537b903
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1250_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1377_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1377_strand.fast5
new file mode 100644
index 0000000..be4ca01
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1377_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read1387_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read1387_strand.fast5
new file mode 100644
index 0000000..c456363
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read1387_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read160_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read160_strand.fast5
new file mode 100644
index 0000000..3ae236d
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read160_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read217_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read217_strand.fast5
new file mode 100644
index 0000000..426089b
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read217_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read223_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read223_strand.fast5
new file mode 100644
index 0000000..2b8d202
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read223_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read249_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read249_strand.fast5
new file mode 100644
index 0000000..9ea26cd
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read249_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read324_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read324_strand.fast5
new file mode 100644
index 0000000..b5234d4
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read324_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read326_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read326_strand.fast5
new file mode 100644
index 0000000..aa96d5b
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read326_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read382_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read382_strand.fast5
new file mode 100644
index 0000000..a6517c9
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read382_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read42_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read42_strand.fast5
new file mode 100644
index 0000000..368bf0b
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read42_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read501_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read501_strand.fast5
new file mode 100644
index 0000000..9eb7352
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read501_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read562_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read562_strand.fast5
new file mode 100644
index 0000000..7708a02
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read562_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read601_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read601_strand.fast5
new file mode 100644
index 0000000..b0938e1
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read601_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read618_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read618_strand.fast5
new file mode 100644
index 0000000..0f74969
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read618_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read700_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read700_strand.fast5
new file mode 100644
index 0000000..a7fff23
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read700_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read743_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read743_strand.fast5
new file mode 100644
index 0000000..459de01
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read743_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read831_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read831_strand.fast5
new file mode 100644
index 0000000..3d7c03d
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read831_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read833_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read833_strand.fast5
new file mode 100644
index 0000000..d3e40e9
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read833_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read843_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read843_strand.fast5
new file mode 100644
index 0000000..b2f78d7
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read843_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read857_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read857_strand.fast5
new file mode 100644
index 0000000..7774e11
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read857_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read899_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read899_strand.fast5
new file mode 100644
index 0000000..77ecc13
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read899_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read914_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read914_strand.fast5
new file mode 100644
index 0000000..b6ed4f6
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read914_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read940_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read940_strand.fast5
new file mode 100644
index 0000000..8ad44b1
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read940_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch126_read969_strand.fast5 b/test_data/2016_3_4_3507_1_ch126_read969_strand.fast5
new file mode 100644
index 0000000..d29a40c
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch126_read969_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read204_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read204_strand.fast5
new file mode 100644
index 0000000..b29c909
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read204_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read270_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read270_strand.fast5
new file mode 100644
index 0000000..65b5b8c
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read270_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read361_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read361_strand.fast5
new file mode 100644
index 0000000..23a3316
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read361_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read365_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read365_strand.fast5
new file mode 100644
index 0000000..6ecd4d2
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read365_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read376_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read376_strand.fast5
new file mode 100644
index 0000000..6717a56
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read376_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read384_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read384_strand.fast5
new file mode 100644
index 0000000..d40776b
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read384_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read404_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read404_strand.fast5
new file mode 100644
index 0000000..1a3a3be
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read404_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read422_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read422_strand.fast5
new file mode 100644
index 0000000..01bd992
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read422_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read430_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read430_strand.fast5
new file mode 100644
index 0000000..a0672d1
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read430_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read503_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read503_strand.fast5
new file mode 100644
index 0000000..bee46f3
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read503_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read521_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read521_strand.fast5
new file mode 100644
index 0000000..33aec9f
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read521_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read635_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read635_strand.fast5
new file mode 100644
index 0000000..83016b9
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read635_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read647_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read647_strand.fast5
new file mode 100644
index 0000000..1960f98
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read647_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read723_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read723_strand.fast5
new file mode 100644
index 0000000..e294436
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read723_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read753_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read753_strand.fast5
new file mode 100644
index 0000000..657b234
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read753_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read763_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read763_strand.fast5
new file mode 100644
index 0000000..54c1567
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read763_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read783_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read783_strand.fast5
new file mode 100644
index 0000000..e14a99d
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read783_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read790_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read790_strand.fast5
new file mode 100644
index 0000000..7873cfb
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read790_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch128_read95_strand.fast5 b/test_data/2016_3_4_3507_1_ch128_read95_strand.fast5
new file mode 100644
index 0000000..b9fb14a
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch128_read95_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5
new file mode 100644
index 0000000..00a4152
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5
new file mode 100644
index 0000000..3ff6cef
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1150_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1150_strand.fast5
new file mode 100644
index 0000000..07f992e
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1150_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1404_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1404_strand.fast5
new file mode 100644
index 0000000..7511932
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1404_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1414_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1414_strand.fast5
new file mode 100644
index 0000000..9f13994
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1414_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1456_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1456_strand.fast5
new file mode 100644
index 0000000..90eb0fa
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1456_strand.fast5 differ
diff --git a/test_data/2016_3_4_3507_1_ch13_read1474_strand.fast5 b/test_data/2016_3_4_3507_1_ch13_read1474_strand.fast5
new file mode 100644
index 0000000..7719a9f
Binary files /dev/null and b/test_data/2016_3_4_3507_1_ch13_read1474_strand.fast5 differ
diff --git a/test_data/COLLES_L160693_20160728_FNFAB23794_MN17350_sequencing_run_E_coli_K12_1D_R9_SpotON_41280_ch52_read58_strand.fast5 b/test_data/COLLES_L160693_20160728_FNFAB23794_MN17350_sequencing_run_E_coli_K12_1D_R9_SpotON_41280_ch52_read58_strand.fast5
new file mode 100644
index 0000000..e139094
Binary files /dev/null and b/test_data/COLLES_L160693_20160728_FNFAB23794_MN17350_sequencing_run_E_coli_K12_1D_R9_SpotON_41280_ch52_read58_strand.fast5 differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/poretools.git



More information about the debian-med-commit mailing list