[med-svn] [python-bd2k] 01/02: New upstream version 1.14~alpha1.37

Steffen Möller moeller at moszumanska.debian.org
Sat Jan 14 09:08:53 UTC 2017


This is an automated email from the git hooks/post-receive script.

moeller pushed a commit to branch master
in repository python-bd2k.

commit 2c0adee92cdcdeb23973889545337a0e586861dc
Author: Steffen Moeller <moeller at debian.org>
Date:   Sat Jan 14 09:30:26 2017 +0100

    New upstream version 1.14~alpha1.37
---
 PKG-INFO                                           |  10 +
 setup.cfg                                          |   9 +
 setup.py                                           |  51 +++++
 src/bd2k/__init__.py                               |   1 +
 src/bd2k/util/__init__.py                          | 227 ++++++++++++++++++++
 src/bd2k/util/collections.py                       | 162 ++++++++++++++
 src/bd2k/util/d32.py                               | 117 ++++++++++
 src/bd2k/util/d64.py                               | 117 ++++++++++
 src/bd2k/util/ec2/__init__.py                      |   0
 src/bd2k/util/ec2/credentials.py                   | 155 ++++++++++++++
 src/bd2k/util/ec2/test/__init__.py                 |   0
 src/bd2k/util/ec2/test/test_credentials.py         |  83 ++++++++
 src/bd2k/util/exceptions.py                        |  81 +++++++
 src/bd2k/util/expando.py                           | 117 ++++++++++
 src/bd2k/util/files.py                             | 106 +++++++++
 src/bd2k/util/fnmatch.py                           | 149 +++++++++++++
 src/bd2k/util/hashes.py                            | 132 ++++++++++++
 src/bd2k/util/humanize.py                          | 131 ++++++++++++
 src/bd2k/util/iterables.py                         | 169 +++++++++++++++
 src/bd2k/util/lockfile.py                          |  36 ++++
 src/bd2k/util/logging.py                           |  29 +++
 src/bd2k/util/objects.py                           | 217 +++++++++++++++++++
 src/bd2k/util/processes.py                         |  35 +++
 src/bd2k/util/retry.py                             | 138 ++++++++++++
 src/bd2k/util/shell.py                             |  28 +++
 src/bd2k/util/strings.py                           | 129 +++++++++++
 src/bd2k/util/test/__init__.py                     |   0
 src/bd2k/util/test/test_d32.py                     |  30 +++
 src/bd2k/util/test/test_d64.py                     |  30 +++
 src/bd2k/util/test/test_files.py                   |  32 +++
 src/bd2k/util/test/test_panic.py                   |  80 +++++++
 src/bd2k/util/test/test_strings.py                 |  13 ++
 src/bd2k/util/threading.py                         |  81 +++++++
 src/bd2k/util/throttle.py                          | 203 ++++++++++++++++++
 src/bd2k/util/xml/__init__.py                      |   1 +
 src/bd2k/util/xml/builder.py                       | 236 +++++++++++++++++++++
 src/bd2k_python_lib.egg-info/PKG-INFO              |  10 +
 src/bd2k_python_lib.egg-info/SOURCES.txt           |  41 ++++
 src/bd2k_python_lib.egg-info/dependency_links.txt  |   1 +
 .../namespace_packages.txt                         |   1 +
 src/bd2k_python_lib.egg-info/pbr.json              |   1 +
 src/bd2k_python_lib.egg-info/top_level.txt         |   1 +
 42 files changed, 3190 insertions(+)

diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..5786e6e
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: bd2k-python-lib
+Version: 1.14a1.dev37
+Summary: The BD2K Python module kitchen sink
+Home-page: https://github.com/BD2KGenomics/bd2k-python-lib
+Author: Hannes Schmidt
+Author-email: hannes at ucsc.edu
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..c702222
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,9 @@
+[pytest]
+python_files = *.py
+addopts = --doctest-modules
+
+[egg_info]
+tag_build = .dev37
+tag_date = 0
+tag_svn_revision = 0
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..dde5441
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,51 @@
+import sys
+
+assert sys.version_info >= (2, 6)
+
+from setuptools import setup, find_packages
+
+kwargs = dict(
+    name="bd2k-python-lib",
+    version="1.14a1",
+
+    author='Hannes Schmidt',
+    author_email='hannes at ucsc.edu',
+    url='https://github.com/BD2KGenomics/bd2k-python-lib',
+    description='The BD2K Python module kitchen sink',
+
+    package_dir={ '': 'src' },
+    packages=find_packages( 'src' ),
+    install_requires=[ ],
+    tests_require=[
+        'pytest==2.7.2',
+        'mock==1.0.1',
+        'lockfile==0.11.0',
+        'boto==2.38.0' ],
+    namespace_packages=[ 'bd2k' ] )
+
+from setuptools.command.test import test as TestCommand
+
+
+class PyTest( TestCommand ):
+    user_options = [ ('pytest-args=', 'a', "Arguments to pass to py.test") ]
+
+    def initialize_options( self ):
+        TestCommand.initialize_options( self )
+        self.pytest_args = [ ]
+
+    def finalize_options( self ):
+        TestCommand.finalize_options( self )
+        self.test_args = [ ]
+        self.test_suite = True
+
+    def run_tests( self ):
+        import pytest
+        # Sanitize command line arguments to avoid confusing Toil code attempting to parse them
+        sys.argv[ 1: ] = [ ]
+        errno = pytest.main( self.pytest_args )
+        sys.exit( errno )
+
+
+kwargs[ 'cmdclass' ] = { 'test': PyTest }
+
+setup( **kwargs )
diff --git a/src/bd2k/__init__.py b/src/bd2k/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/src/bd2k/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/src/bd2k/util/__init__.py b/src/bd2k/util/__init__.py
new file mode 100644
index 0000000..9c240ae
--- /dev/null
+++ b/src/bd2k/util/__init__.py
@@ -0,0 +1,227 @@
+from __future__ import absolute_import
+
+import datetime
+import grp
+import pwd
+from functools import wraps
+
+from threading import Lock
+
+import re
+
+
+def uid_to_name( uid ):
+    return pwd.getpwuid( uid ).pw_name
+
+
+def gid_to_name( gid ):
+    return grp.getgrgid( gid ).gr_name
+
+
+def name_to_uid( name ):
+    return pwd.getpwnam( name ).pw_uid
+
+
+def name_to_gid( name ):
+    return grp.getgrnam( name ).gr_gid
+
+
+def memoize( f ):
+    """
+    A decorator that memoizes a function result based on its parameters. For example, this can be
+    used in place of lazy initialization. If the decorating function is invoked by multiple
+    threads, the decorated function may be called more than once with the same arguments.
+    """
+
+    # TODO: Recommend that f's arguments be immutable
+
+    memory = { }
+
+    @wraps( f )
+    def new_f( *args ):
+        try:
+            return memory[ args ]
+        except KeyError:
+            r = f( *args )
+            memory[ args ] = r
+            return r
+
+    return new_f
+
+
+def sync_memoize( f ):
+    """
+    Like memoize, but guarantees that decorated function is only called once, even when multiple
+    threads are calling the decorating function with multiple parameters.
+    """
+
+    # TODO: Think about an f that is recursive
+
+    memory = { }
+    lock = Lock( )
+
+    @wraps( f )
+    def new_f( *args ):
+        try:
+            return memory[ args ]
+        except KeyError:
+            # on cache misses, retry with lock held
+            with lock:
+                try:
+                    return memory[ args ]
+                except KeyError:
+                    r = f( *args )
+                    memory[ args ] = r
+                    return r
+
+    return new_f
+
+
+def properties( obj ):
+    """
+    Returns a dictionary with one entry per attribute of the given object. The key being the
+    attribute name and the value being the attribute value. Attributes starting in two
+    underscores will be ignored. This function is an alternative to vars() which only returns
+    instance variables, not properties. Note that methods are returned as well but the value in
+    the dictionary is the method, not the return value of the method.
+
+    >>> class Foo():
+    ...     def __init__(self):
+    ...         self.var = 1
+    ...     @property
+    ...     def prop(self):
+    ...         return self.var + 1
+    ...     def meth(self):
+    ...         return self.var + 2
+    >>> foo = Foo()
+    >>> properties( foo ) == { 'var':1, 'prop':2, 'meth':foo.meth }
+    True
+
+    Note how the entry for prop is not a bound method (i.e. the getter) but a the return value of
+    that getter.
+    """
+    return dict( (attr, getattr( obj, attr ))
+                     for attr in dir( obj )
+                     if not attr.startswith( '__' ) )
+
+
+def ilen( it ):
+    """
+    Return the number of elements in an iterable
+
+    >>> ilen(xrange(0,100))
+    100
+    """
+    return sum( 1 for _ in it )
+
+
+def rfc3339_datetime_re( anchor=True ):
+    """
+    Returns a regular expression for syntactic validation of ISO date-times, RFC-3339 date-times
+    to be precise.
+
+
+    >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39Z') )
+    True
+
+    >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z') )
+    True
+
+    >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39-08:00') )
+    True
+
+    >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123+11:00') )
+    True
+
+    It anchors the matching to the beginning and end of a string by default ...
+
+    >>> bool( rfc3339_datetime_re().search('bla 2013-11-06T15:56:39Z bla') )
+    False
+
+    ... but that can be changed:
+
+    >>> bool( rfc3339_datetime_re( anchor=False ).search('bla 2013-11-06T15:56:39Z bla') )
+    True
+
+    >>> bool( rfc3339_datetime_re( anchor=False ).match('2013-11-06T15:56:39Z bla') )
+    True
+
+    Keep in mind that re.match() always anchors at the beginning:
+
+    >>> bool( rfc3339_datetime_re( anchor=False ).match('bla 2013-11-06T15:56:39Z') )
+    False
+
+    It does not check whether the actual value is a semantically valid datetime:
+
+    >>> bool( rfc3339_datetime_re().match('9999-99-99T99:99:99.9-99:99') )
+    True
+
+    If the regular expression matches, each component of the matching value will be exposed as a
+    captured group in the match object.
+
+    >>> rfc3339_datetime_re().match('2013-11-06T15:56:39Z').groups()
+    ('2013', '11', '06', '15', '56', '39', None, 'Z')
+    >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z').groups()
+    ('2013', '11', '06', '15', '56', '39', '123', 'Z')
+    >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123-08:30').groups()
+    ('2013', '11', '06', '15', '56', '39', '123', '-08:30')
+    """
+    return re.compile(
+        ('^' if anchor else '') +
+        '(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:\d{2})' +
+        ('$' if anchor else '') )
+
+
+_rfc3339_datetime_re = rfc3339_datetime_re( )
+
+
+def parse_iso_utc( s ):
+    """
+    Parses an ISO time with a hard-coded Z for zulu-time (UTC) at the end. Other timezones are
+    not supported.
+
+    :param str s: the ISO-formatted time
+
+    :rtype: datetime.datetime
+
+    :return: an timezone-naive datetime object
+
+    >>> parse_iso_utc('2016-04-27T00:28:04.000Z')
+    datetime.datetime(2016, 4, 27, 0, 28, 4)
+    >>> parse_iso_utc('2016-04-27T00:28:04Z')
+    datetime.datetime(2016, 4, 27, 0, 28, 4)
+    >>> parse_iso_utc('2016-04-27T00:28:04X')
+    Traceback (most recent call last):
+    ...
+    ValueError: Not a valid ISO datetime in UTC: 2016-04-27T00:28:04X
+    """
+    m = _rfc3339_datetime_re.match( s )
+    if not m:
+        raise ValueError( 'Not a valid ISO datetime in UTC: ' + s )
+    else:
+        fmt = '%Y-%m-%dT%H:%M:%S' + ('.%f' if m.group( 7 ) else '') + 'Z'
+        return datetime.datetime.strptime( s, fmt )
+
+
+def strict_bool( s ):
+    """
+    Variant of bool() that only accepts two possible string values.
+    """
+    if s == 'True':
+        return True
+    elif s == 'False':
+        return False
+    else:
+        raise ValueError( s )
+
+
+def less_strict_bool( x ):
+    """
+    Idempotent and None-safe version of strict_bool.
+    """
+    if x is None:
+        return False
+    elif x is True or x is False:
+        return x
+    else:
+        return strict_bool( x )
diff --git a/src/bd2k/util/collections.py b/src/bd2k/util/collections.py
new file mode 100644
index 0000000..175d87d
--- /dev/null
+++ b/src/bd2k/util/collections.py
@@ -0,0 +1,162 @@
+from __future__ import absolute_import
+
+import collections
+from itertools import dropwhile
+
+
+class OrderedSet( collections.MutableSet ):
+    """
+    An ordered set from http://code.activestate.com/recipes/576694/
+
+    Note: Maybe leaky, may have O(N) lookup by index
+
+    TODO: Consider https://github.com/LuminosoInsight/ordered-set which uses a native Python list
+    instead of a linked list
+
+    >>> s = OrderedSet( 'abracadaba' )
+    >>> s
+    OrderedSet(['a', 'b', 'r', 'c', 'd'])
+    >>> t = OrderedSet( 'simsalabim' )
+    >>> t
+    OrderedSet(['s', 'i', 'm', 'a', 'l', 'b'])
+    >>> s | t
+    OrderedSet(['a', 'b', 'r', 'c', 'd', 's', 'i', 'm', 'l'])
+    >>> s & t
+    OrderedSet(['a', 'b'])
+    >>> s - t
+    OrderedSet(['r', 'c', 'd'])
+    >>> t - s
+    OrderedSet(['s', 'i', 'm', 'l'])
+    >>> OrderedSet( reversed( s ) )
+    OrderedSet(['d', 'c', 'r', 'b', 'a'])
+    >>> s.pop()
+    'd'
+    >>> s
+    OrderedSet(['a', 'b', 'r', 'c'])
+    >>> s.discard('b')
+    >>> s
+    OrderedSet(['a', 'r', 'c'])
+    >>> s.pop( last=False )
+    'a'
+    >>> s
+    OrderedSet(['r', 'c'])
+    >>> s.union( t )
+    >>> s
+    OrderedSet(['r', 'c', 's', 'i', 'm', 'a', 'l', 'b'])
+
+    >>> s = OrderedSet()
+    >>> s
+    OrderedSet()
+    >>> s.pop()
+    Traceback (most recent call last):
+    ....
+    KeyError: 'set is empty'
+    >>> OrderedSet( "aba" ) == OrderedSet( "ab" )
+    True
+    >>> OrderedSet( "aba" ) == OrderedSet( "abc" )
+    False
+    >>> OrderedSet( "aba" ) == OrderedSet( "ba" )
+    False
+    >>> OrderedSet( "aba" ) == set( "ba" )
+    True
+    """
+
+    def __init__( self, iterable=None ):
+        self.end = end = [ ]
+        end += [ None, end, end ]  # sentinel node for doubly linked list
+        self.map = { }  # key --> [key, prev, next]
+        if iterable is not None:
+            self |= iterable
+
+    def __len__( self ):
+        return len( self.map )
+
+    def __contains__( self, key ):
+        return key in self.map
+
+    def add( self, key ):
+        if key not in self.map:
+            end = self.end
+            curr = end[ 1 ]
+            curr[ 2 ] = end[ 1 ] = self.map[ key ] = [ key, curr, end ]
+
+    def discard( self, key ):
+        if key in self.map:
+            key, prev, next = self.map.pop( key )
+            prev[ 2 ] = next
+            next[ 1 ] = prev
+
+    def __iter__( self ):
+        end = self.end
+        curr = end[ 2 ]
+        while curr is not end:
+            yield curr[ 0 ]
+            curr = curr[ 2 ]
+
+    def __reversed__( self ):
+        end = self.end
+        curr = end[ 1 ]
+        while curr is not end:
+            yield curr[ 0 ]
+            curr = curr[ 1 ]
+
+    def pop( self, last=True ):
+        if not self:
+            raise KeyError( 'set is empty' )
+        key = self.end[ 1 ][ 0 ] if last else self.end[ 2 ][ 0 ]
+        self.discard( key )
+        return key
+
+    def __repr__( self ):
+        if not self:
+            return '%s()' % (self.__class__.__name__,)
+        return '%s(%r)' % (self.__class__.__name__, list( self ))
+
+    def __eq__( self, other ):
+        if isinstance( other, OrderedSet ):
+            return len( self ) == len( other ) and list( self ) == list( other )
+        return set( self ) == set( other )
+
+    def union(self,other):
+        self |= other
+
+
+def rindex( l, v ):
+    """
+    Like l.index(v) but finds last occurrence of value v in sequence l.
+
+    :type l: anything
+
+    >>> rindex( [0], 0 )
+    0
+    >>> rindex( [0,0], 0 )
+    1
+    >>> rindex( [0,1], 0 )
+    0
+    >>> rindex( [0,1,0,1], 0 )
+    2
+    >>> rindex( [0,1,0,1], 1 )
+    3
+    >>> rindex( [0], 1 )
+    Traceback (most recent call last):
+    ...
+    ValueError: 1
+    >>> rindex( [None], None )
+    0
+    >>> rindex( [], None )
+    Traceback (most recent call last):
+    ...
+    ValueError: None
+    >>> rindex( "0101", '0')
+    2
+    >>> rindex( (0,1,0,1), 0 )
+    2
+    >>> rindex( xrange(3), 2 )
+    2
+    """
+    try:
+        n = next( dropwhile( lambda (i, x): v != x, enumerate( reversed( l ), 1 ) ) )[ 0 ]
+    except StopIteration:
+        raise ValueError( v )
+    else:
+        return len( l ) - n
diff --git a/src/bd2k/util/d32.py b/src/bd2k/util/d32.py
new file mode 100644
index 0000000..100eacb
--- /dev/null
+++ b/src/bd2k/util/d32.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Inspired by Dominic Tarr's JavaScript at https://github.com/dominictarr/d64
+
+class D32( object ):
+    """
+    Base32 encoding and decoding without padding, and using an arbitrary alphabet.
+    """
+
+    def __init__( self, alphabet ):
+        super( D32, self ).__init__( )
+        self.alphabet = bytearray( alphabet )
+        self.lookup = bytearray( 255 )
+        for i in xrange( 32 ):
+            self.lookup[ self.alphabet[ i ] ] = i
+
+    def encode( self, d ):
+        """
+        >>> encode = standard.encode
+        >>> encode('')
+        ''
+        >>> encode('\\0')
+        '22'
+        >>> encode('\\xff')
+        'zw'
+        >>> encode('\\0\\1\\2\\3\\4')
+        '222k62s6'
+        >>> encode('\\0\\1\\2\\3\\4\\5')
+        '222k62s62o'
+        """
+        m = len( d )
+        n = (m * 8 + 4) / 5
+        padding = 8 - n % 8
+        e = bytearray( n + padding )
+        i, j = 0, 0
+        a = self.alphabet
+
+        while i < m:
+            if m - i < 5:
+                g = bytearray( d[ i: ] + '\0' * (5 - (m - i)) )
+            else:
+                g = bytearray( d[ i:i + 5 ] )
+            # bit              1          2          3
+            # bit   01234567 89012345 67890123 45678901 23456789
+            # byte  00000000 11111111 22222222 33333333 44444444
+            # group 00000111 11222223 33334444 45555566 66677777
+            e[ j + 0 ] = a[ g[ 0 ] >> 3 ]
+            e[ j + 1 ] = a[ g[ 0 ] << 2 & 31 | g[ 1 ] >> 6 ]
+            e[ j + 2 ] = a[ g[ 1 ] >> 1 & 31 ]
+            e[ j + 3 ] = a[ g[ 1 ] << 4 & 31 | g[ 2 ] >> 4 ]
+            e[ j + 4 ] = a[ g[ 2 ] << 1 & 31 | g[ 3 ] >> 7 ]
+            e[ j + 5 ] = a[ g[ 3 ] >> 2 & 31 ]
+            e[ j + 6 ] = a[ g[ 3 ] << 3 & 31 | g[ 4 ] >> 5 ]
+            e[ j + 7 ] = a[ g[ 4 ] & 31 ]
+            j += 8
+            i += 5
+        return str( e[ :-padding ] )
+
+    def decode( self, e ):
+        """
+        >>> decode = standard.decode
+
+        # >>> decode('222k62s62o')
+        # '\\x00\\x01\\x02\\x03\\x04\\x05'
+        # >>> decode('222k62s6')
+        # '\\x00\\x01\\x02\\x03\\x04'
+        >>> decode('zw')
+        '\\xff'
+        """
+        n = len( e )
+        m = n * 5 / 8
+        padding = 5 - m % 5
+        d = bytearray( m + padding )
+        i, j = 0, 0
+        l = self.lookup
+        while j < n:
+            if n - j < 8:
+                g = [ l[ ord( x ) ] for x in e[ j: ] ] + [ 0 ] * (8 - (n - j))
+            else:
+                g = [ l[ ord( x ) ] for x in e[ j:j + 8 ] ]
+            # bit              1          2          3
+            # bit   01234567 89012345 67890123 45678901 23456789
+            # byte  00000000 11111111 22222222 33333333 44444444
+            # group 00000111 11222223 33334444 45555566 66677777
+            d[ i + 0 ] = g[ 0 ] << 3 & 255 | g[ 1 ] >> 2
+            d[ i + 1 ] = g[ 1 ] << 6 & 255 | g[ 2 ] << 1 & 255 | g[ 3 ] >> 4
+            d[ i + 2 ] = g[ 3 ] << 4 & 255 | g[ 4 ] >> 1
+            d[ i + 3 ] = g[ 4 ] << 7 & 255 | g[ 5 ] << 2 & 255 | g[ 6 ] >> 3
+            d[ i + 4 ] = g[ 6 ] << 5 & 255 | g[ 7 ]
+            j += 8
+            i += 5
+        return str( d[ :-padding ] )
+
+
+# A variant of Base64 that maintains the lexicographical ordering such that for any given list of
+# string l, map( decode, sorted( map( standard.encode, l ) ) == sorted( l )
+
+standard = D32( '234567abcdefghijklmnopqrstuvwxyz' )
+
+# A reimplementation of base64.b32encode and base64.b32encode, but faster and without padding:
+
+base32 = D32( 'abcdefghijklmnopqrstuvwxyz234567' )
diff --git a/src/bd2k/util/d64.py b/src/bd2k/util/d64.py
new file mode 100644
index 0000000..c77d967
--- /dev/null
+++ b/src/bd2k/util/d64.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Ported from JS found at https://github.com/dominictarr/d64
+
+
+
+class D64( object ):
+    def __init__( self, special_chars ):
+        super( D64, self ).__init__( )
+        alphabet = 'PYFGCRLAOEUIDHTNSQJKXBMWVZpyfgcrlaoeuidhtnsqjkxbmwvz1234567890'
+        self.alphabet = bytearray( sorted( alphabet + special_chars ) )
+        self.lookup = bytearray( 255 )
+        for i in xrange( 64 ):
+            code = self.alphabet[ i ]
+            self.lookup[ code ] = i
+
+    def encode( self, data ):
+        """
+        >>> encode = standard.encode
+        >>> encode('')
+        ''
+        >>> encode('\\x00')
+        '..'
+        >>> encode('\\x00\\x01')
+        '..3'
+        >>> encode('\\x00\\x01\\x02')
+        '..31'
+        >>> encode('\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07')
+        '..31.kF40VR'
+        """
+        l = len( data )
+        s = bytearray( (l * 4 + 2) / 3 )
+        hang = 0
+        j = 0
+        a = self.alphabet
+        for i in xrange( l ):
+            v = ord( data[ i ] )
+            r = i % 3
+            if r == 0:
+                s[ j ] = a[ v >> 2 ]
+                j += 1
+                hang = (v & 3) << 4
+            elif r == 1:
+                s[ j ] = a[ hang | v >> 4 ]
+                j += 1
+                hang = (v & 0xf) << 2
+            elif r == 2:
+                s[ j ] = a[ hang | v >> 6 ]
+                j += 1
+                s[ j ] = a[ v & 0x3f ]
+                j += 1
+                hang = 0
+            else:
+                assert False
+        if l % 3:
+            s[ j ] = a[ hang ]
+
+        return str( s )
+
+    def decode( self, e ):
+        """
+        >>> decode = standard.decode
+        >>> decode('')
+        ''
+        >>> decode('..')
+        '\\x00'
+        >>> decode('..3')
+        '\\x00\\x01'
+        >>> decode('..31')
+        '\\x00\\x01\\x02'
+        >>> decode('..31.kF40VR')
+        '\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07'
+        """
+        n = len( e )
+        j = 0
+        b = bytearray( n * 3 / 4 )
+        hang = 0
+        l = self.lookup
+
+        for i in xrange( n ):
+            v = l[ ord( e[ i ] ) ]
+            r = i % 4
+            if r == 0:
+                hang = v << 2
+            elif r == 1:
+                b[ j ] = hang | v >> 4
+                j += 1
+                hang = (v << 4) & 0xFF
+            elif r == 2:
+                b[ j ] = hang | v >> 2
+                j += 1
+                hang = (v << 6) & 0xFF
+            elif r == 3:
+                b[ j ] = hang | v
+                j += 1
+            else:
+                assert False
+        return str( b )
+
+
+standard = D64( '._' )
diff --git a/src/bd2k/util/ec2/__init__.py b/src/bd2k/util/ec2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/ec2/credentials.py b/src/bd2k/util/ec2/credentials.py
new file mode 100644
index 0000000..58b0a5d
--- /dev/null
+++ b/src/bd2k/util/ec2/credentials.py
@@ -0,0 +1,155 @@
+import errno
+import logging
+import threading
+import time
+from datetime import datetime
+
+import os
+from bd2k.util.files import mkdir_p
+
+log = logging.getLogger( __name__ )
+
+cache_path = '~/.cache/aws/cached_temporary_credentials'
+
+datetime_format = "%Y-%m-%dT%H:%M:%SZ"  # incidentally the same as the format used by AWS
+
+
+def datetime_to_str( dt ):
+    """
+    Convert a naive (implicitly UTC) datetime object into a string, explicitly UTC.
+
+    >>> datetime_to_str( datetime( 1970, 1, 1, 0, 0, 0 ) )
+    '1970-01-01T00:00:00Z'
+    """
+    return dt.strftime( datetime_format )
+
+
+def str_to_datetime( s ):
+    """
+    Convert a string, explicitly UTC into a naive (implicitly UTC) datetime object.
+
+    >>> str_to_datetime( '1970-01-01T00:00:00Z' )
+    datetime.datetime(1970, 1, 1, 0, 0)
+
+    Just to show that the constructor args for seconds and microseconds are optional:
+    >>> datetime(1970, 1, 1, 0, 0, 0)
+    datetime.datetime(1970, 1, 1, 0, 0)
+    """
+    return datetime.strptime( s, datetime_format )
+
+
+monkey_patch_lock = threading.RLock( )
+_populate_keys_from_metadata_server_orig = None
+
+
+def enable_metadata_credential_caching( ):
+    """
+    Monkey-patches Boto to allow multiple processes using it to share one set of cached, temporary
+    IAM role credentials. This helps avoid hitting request limits imposed on the metadata service
+    when too many processes concurrently request those credentials. Function is idempotent.
+
+    This function should be called before any AWS connections attempts are made with Boto.
+    """
+    global _populate_keys_from_metadata_server_orig
+    with monkey_patch_lock:
+        if _populate_keys_from_metadata_server_orig is None:
+            from boto.provider import Provider
+            _populate_keys_from_metadata_server_orig = Provider._populate_keys_from_metadata_server
+            Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server
+
+
+def disable_metadata_credential_caching( ):
+    """
+    Reverse the effect of enable_metadata_credential_caching()
+    """
+    global _populate_keys_from_metadata_server_orig
+    with monkey_patch_lock:
+        if _populate_keys_from_metadata_server_orig is not None:
+            from boto.provider import Provider
+            Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server_orig
+            _populate_keys_from_metadata_server_orig = None
+
+
+def _populate_keys_from_metadata_server( self ):
+    global _populate_keys_from_metadata_server_orig
+    path = os.path.expanduser( cache_path )
+    tmp_path = path + '.tmp'
+    while True:
+        log.debug( 'Attempting to read cached credentials from %s.', path )
+        try:
+            with open( path, 'r' ) as f:
+                content = f.read( )
+                if content:
+                    record = content.split( '\n' )
+                    assert len(record) == 4
+                    self._access_key = record[ 0 ]
+                    self._secret_key = record[ 1 ]
+                    self._security_token = record[ 2 ]
+                    self._credential_expiry_time = str_to_datetime( record[ 3 ] )
+                else:
+                    log.debug( '%s is empty. Credentials are not temporary.', path )
+                    return
+        except IOError as e:
+            if e.errno == errno.ENOENT:
+                log.debug( 'Cached credentials are missing.' )
+                dir_path = os.path.dirname( path )
+                if not os.path.exists( dir_path ):
+                    log.debug( 'Creating parent directory %s', dir_path )
+                    # A race would be ok at this point
+                    mkdir_p( dir_path )
+            else:
+                raise
+        else:
+            if self._credentials_need_refresh( ):
+                log.debug( 'Cached credentials are expired.' )
+            else:
+                log.debug( 'Cached credentials exist and are still fresh.' )
+                return
+        # We get here if credentials are missing or expired
+        log.debug( 'Racing to create %s.', tmp_path )
+        # Only one process, the winner, will succeed
+        try:
+            fd = os.open( tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0600 )
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                log.debug( 'Lost the race to create %s. Waiting on winner to remove it.', tmp_path )
+                while os.path.exists( tmp_path ):
+                    time.sleep( .1 )
+                log.debug( 'Winner removed %s. Trying from the top.', tmp_path )
+            else:
+                raise
+        else:
+            try:
+                log.debug( 'Won the race to create %s. '
+                           'Requesting credentials from metadata service.', tmp_path )
+                _populate_keys_from_metadata_server_orig( self )
+            except:
+                os.close( fd )
+                fd = None
+                log.debug( 'Failed to obtain credentials, removing %s.', tmp_path )
+                # This unblocks the loosers.
+                os.unlink( tmp_path )
+                # Bail out. It's too likely to happen repeatedly
+                raise
+            else:
+                if self._credential_expiry_time is None:
+                    os.close( fd )
+                    fd = None
+                    log.debug( 'Credentials are not temporary. '
+                               'Leaving %s empty and renaming it to %s.', tmp_path, path )
+                else:
+                    log.debug( 'Writing credentials to %s.', tmp_path )
+                    with os.fdopen( fd, 'w' ) as fh:
+                        fd = None
+                        fh.write( '\n'.join( [
+                            self._access_key,
+                            self._secret_key,
+                            self._security_token,
+                            datetime_to_str( self._credential_expiry_time ) ] ) )
+                    log.debug( 'Wrote credentials to %s. '
+                               'Renaming it to %s.', tmp_path, path )
+                os.rename( tmp_path, path )
+                return
+            finally:
+                if fd is not None:
+                    os.close( fd )
diff --git a/src/bd2k/util/ec2/test/__init__.py b/src/bd2k/util/ec2/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/ec2/test/test_credentials.py b/src/bd2k/util/ec2/test/test_credentials.py
new file mode 100644
index 0000000..1fa713f
--- /dev/null
+++ b/src/bd2k/util/ec2/test/test_credentials.py
@@ -0,0 +1,83 @@
+import logging
+
+import errno
+
+import os
+import unittest
+
+from bd2k.util.ec2.credentials import (enable_metadata_credential_caching,
+                                       disable_metadata_credential_caching, cache_path)
+
+
+def get_access_key( ):
+    from boto.provider import Provider
+    provider = Provider( 'aws' )
+    return None if provider._credential_expiry_time is None else provider.get_access_key( ) 
+
+
+class CredentialsTest( unittest.TestCase ):
+    def __init__( self, *args, **kwargs ):
+        super( CredentialsTest, self ).__init__( *args, **kwargs )
+        self.cache_path = os.path.expanduser( cache_path )
+
+    @classmethod
+    def setUpClass( cls ):
+        super( CredentialsTest, cls ).setUpClass( )
+        logging.basicConfig( level=logging.DEBUG )
+
+    def setUp( self ):
+        super( CredentialsTest, self ).setUp( )
+        self.cleanUp( )
+
+    def cleanUp( self ):
+        try:
+            os.unlink( self.cache_path )
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                pass
+            else:
+                raise
+
+    def tearDown( self ):
+        super( CredentialsTest, self ).tearDown( )
+        self.cleanUp( )
+
+    def test_metadata_credential_caching( self ):
+        """
+        Brute forces many concurrent requests for getting temporary credentials. If you comment
+        out the calls to enable_metadata_credential_caching, you should see some failures due to
+        requests timing out. The test will also take much longer in that case.
+        """
+        num_tests = 1000
+        num_processes = 32
+        # Get key without caching
+        access_key = get_access_key( )
+        self.assertFalse( os.path.exists( self.cache_path ) )
+        enable_metadata_credential_caching( )
+        # Again for idempotence
+        enable_metadata_credential_caching( )
+        try:
+            futures = [ ]
+            from multiprocessing import Pool
+            pool = Pool( num_processes )
+            try:
+                for i in range( num_tests ):
+                    futures.append( pool.apply_async( get_access_key ) )
+            except:
+                pool.close( )
+                pool.terminate( )
+                raise
+            else:
+                pool.close( )
+                pool.join( )
+        finally:
+            disable_metadata_credential_caching( )
+            # Again for idempotence
+            disable_metadata_credential_caching( )
+        self.assertEquals( access_key is not None, os.path.exists( self.cache_path ) )
+        self.assertEquals( len( futures ), num_tests )
+        access_keys = [ f.get( ) for f in futures ]
+        self.assertEquals( len( access_keys ), num_tests )
+        access_keys = set( access_keys )
+        self.assertEquals( len( access_keys ), 1 )
+        self.assertEquals( access_keys.pop( ), access_key )
diff --git a/src/bd2k/util/exceptions.py b/src/bd2k/util/exceptions.py
new file mode 100644
index 0000000..1a8885a
--- /dev/null
+++ b/src/bd2k/util/exceptions.py
@@ -0,0 +1,81 @@
+from contextlib import contextmanager
+import sys
+
+
+class panic( object ):
+    """
+    The Python idiom for reraising a primary exception fails when the except block raises a
+    secondary exception, e.g. while trying to cleanup. In that case the original exception is
+    lost and the secondary exception is reraised. The solution seems to be to save the primary
+    exception info as returned from sys.exc_info() and then reraise that.
+
+    This is a contextmanager that should be used like this
+
+    try:
+         # do something that can fail
+    except:
+        with panic( log ):
+            # do cleanup that can also fail
+
+    If a logging logger is passed to panic(), any secondary Exception raised within the with
+    block will be logged. Otherwise those exceptions are swallowed. At the end of the with block
+    the primary exception will be reraised.
+    """
+
+    def __init__( self, log=None ):
+        super( panic, self ).__init__( )
+        self.log = log
+        self.exc_info = None
+
+    def __enter__( self ):
+        self.exc_info = sys.exc_info( )
+
+    def __exit__( self, *exc_info ):
+        if self.log is not None and exc_info and exc_info[ 0 ]:
+            self.log.warn( "Exception during panic", exc_info=exc_info )
+        exc_type, exc_value, traceback = self.exc_info
+        raise exc_type, exc_value, traceback
+
+
+class RequirementError( Exception ):
+    """
+    The expcetion raised bye require(). Where AssertionError is raised when there is likely an
+    internal problem within the code base, i.e. a bug, an instance of this class is raised when
+    the cause lies outside the code base, e.g. with the user or caller.
+    """
+    pass
+
+
+def require( value, message, *message_args ):
+    """
+    Raise RequirementError with the given message if the given value is considered false. See
+    https://docs.python.org/2/library/stdtypes.html#truth-value-testing for a defintiion of which
+    values are false. This function is commonly used for validating user input. It is meant to be
+    complimentary to assert. See RequirementError for more on that.
+
+    :param Any value: the value to be tested
+    :param message:
+    :param message_args: optional values for % formatting the given message
+    :return:
+
+    >>> require(1 + 1 == 2, 'You made a terrible mistake')
+
+    >>> require(1 + 1 == 3, 'You made a terrible mistake')
+    Traceback (most recent call last):
+    ...
+    RequirementError: You made a terrible mistake
+
+    >>> require(1 + 1 == 3, 'You made a terrible mistake, %s', 'you fool')
+    Traceback (most recent call last):
+    ...
+    RequirementError: You made a terrible mistake, you fool
+
+    >>> require(1 + 1 == 3, 'You made a terrible mistake, %s %s', 'your', 'majesty')
+    Traceback (most recent call last):
+    ...
+    RequirementError: You made a terrible mistake, your majesty
+    """
+    if not value:
+        if message_args:
+            message = message % message_args
+        raise RequirementError( message)
diff --git a/src/bd2k/util/expando.py b/src/bd2k/util/expando.py
new file mode 100644
index 0000000..f741f2a
--- /dev/null
+++ b/src/bd2k/util/expando.py
@@ -0,0 +1,117 @@
+class Expando(dict):
+    """
+    Pass inital attributes to the constructor:
+
+    >>> o = Expando(foo=42)
+    >>> o.foo
+    42
+
+    Dynamically create new attributes:
+
+    >>> o.bar = 'hi'
+    >>> o.bar
+    'hi'
+
+    Expando is a dictionary:
+
+    >>> isinstance(o,dict)
+    True
+    >>> o['foo']
+    42
+
+    Works great with JSON:
+
+    >>> import json
+    >>> s='{"foo":42}'
+    >>> o = json.loads(s,object_hook=Expando)
+    >>> o
+    {u'foo': 42}
+    >>> o.foo
+    42
+    >>> o.bar = 'hi'
+    >>> o
+    {u'foo': 42, 'bar': 'hi'}
+
+    And since Expando is a dict, it serializes back to JSON just fine:
+
+    >>> json.dumps(o)
+    '{"foo": 42, "bar": "hi"}'
+
+    Attributes can be deleted, too:
+
+    >>> o = Expando(foo=42)
+    >>> o.foo
+    42
+    >>> del o.foo
+    >>> o.foo
+    Traceback (most recent call last):
+    ...
+    AttributeError: 'Expando' object has no attribute 'foo'
+    >>> o['foo']
+    Traceback (most recent call last):
+    ...
+    KeyError: 'foo'
+
+    >>> del o.foo
+    Traceback (most recent call last):
+    ...
+    AttributeError: foo
+
+    And copied:
+
+    >>> o = Expando(foo=42)
+    >>> p = o.copy()
+    >>> isinstance(p,Expando)
+    True
+    >>> o == p
+    True
+    >>> o is p
+    False
+
+    Same with MagicExpando ...
+
+    >>> o = MagicExpando()
+    >>> o.foo.bar = 42
+    >>> p = o.copy()
+    >>> isinstance(p,MagicExpando)
+    True
+    >>> o == p
+    True
+    >>> o is p
+    False
+
+    ... but the copy is shallow:
+
+    >>> o.foo is p.foo
+    True
+    """
+
+    def __init__( self, *args, **kwargs ):
+        super( Expando, self ).__init__( *args, **kwargs )
+        self.__slots__ = None
+        self.__dict__ = self
+
+    def copy(self):
+        return type(self)(self)
+
+class MagicExpando(Expando):
+    """
+    Use MagicExpando for chained attribute access. The first time a missing attribute is
+    accessed, it will be set to a new child MagicExpando.
+
+    >>> o=MagicExpando()
+    >>> o.foo = 42
+    >>> o
+    {'foo': 42}
+    >>> o.bar.hello = 'hi'
+    >>> o
+    {'foo': 42, 'bar': {'hello': 'hi'}}
+    """
+    def __getattribute__( self, name ):
+        try:
+            return super( Expando, self ).__getattribute__( name )
+        except AttributeError:
+            child = self.__class__( )
+            self[name] = child
+            return child
+
diff --git a/src/bd2k/util/files.py b/src/bd2k/util/files.py
new file mode 100644
index 0000000..da1ffd3
--- /dev/null
+++ b/src/bd2k/util/files.py
@@ -0,0 +1,106 @@
+import errno
+import os
+
+
+def mkdir_p( path ):
+    """
+    The equivalent of mkdir -p
+    """
+    try:
+        os.makedirs( path )
+    except OSError as exc:
+        if exc.errno == errno.EEXIST and os.path.isdir( path ):
+            pass
+        else:
+            raise
+
+
+def rm_f( path ):
+    """
+    Remove the file at the given path with os.remove(), ignoring errors caused by the file's absence.
+    """
+    try:
+        os.remove( path )
+    except OSError as e:
+        if e.errno == errno.ENOENT:
+            pass
+        else:
+            raise
+
+
+def copyfileobj( src, dst, limit=None, bufsize=1024 * 1024 ):
+    """
+    Copy the contents of one file object to another file object. If limit is given, stop after at
+    most limit bytes were copied. The copying will begin at the current file pointer of each file
+    object.
+
+    :param src: the file object to copy from
+
+    :param dst: the file object to copy to
+
+    :param limit: the maximum number of bytes to copy or None if all remaining bytes in src
+           should be copied
+
+    :param bufsize: the size of the intermediate copy buffer. No more than that many bytes will
+           ever be read from src or written to dst at any one time.
+
+    :return: None if limit is None, otherwise the difference between limit and the number of
+    bytes actually copied. This will be > 0 if and only if the source file hit EOF before limit
+    number of bytes could be read.
+
+    >>> import tempfile
+    >>> with open('/dev/urandom') as f1:
+    ...     with tempfile.TemporaryFile() as f2:
+    ...         copyfileobj(f1,f2,limit=100)
+    ...         f2.seek(60)
+    ...         with tempfile.TemporaryFile() as f3:
+    ...             copyfileobj(f2,f3), f2.tell(), f3.tell()
+    (None, 100, 40)
+    """
+    while limit is None or limit > 0:
+        buf = src.read( bufsize if limit is None or bufsize < limit else limit )
+        if buf:
+            if limit is not None:
+                limit -= len( buf )
+                assert limit >= 0
+            dst.write( buf )
+        else:
+            return limit
+
+
+if False:
+    # These are not needed for Python 2.7 as Python's builtin file object's read() and write()
+    # method are greedy. For Python 3.x these may be useful.
+
+    def gread( readable, n ):
+        """
+        Greedy read. Read until readable is exhausted, and error occurs or the given number of bytes
+        have been read. If it returns fewer than the requested number bytes if and only if the end of
+        file has been reached.
+
+        :type readable: io.FileIO
+        """
+        bufs = [ ]
+        i = 0
+        while i < n:
+            buf = readable.read( n - i )
+            m = len( buf )
+            if m == 0:
+                break
+            bufs.append( buf )
+            i += m
+        return ''.join( bufs )
+
+
+    def gwrite( writable, buf ):
+        """
+        Greedy write. Write until the entire buffer has been written to or an error occurs.
+
+        :type writable: io.FileIO[str|bytearray]
+
+        :type buf: str|bytearray
+        """
+        n = len( buf )
+        i = 0
+        while i < n:
+            i += writable.write( buf[ i: ] )
diff --git a/src/bd2k/util/fnmatch.py b/src/bd2k/util/fnmatch.py
new file mode 100644
index 0000000..f6d335e
--- /dev/null
+++ b/src/bd2k/util/fnmatch.py
@@ -0,0 +1,149 @@
+# Same as Python's fnmatch with the following diferences:
+# - '/' doesn't match '*'
+# - added '**' to match anything
+# - added some unit tests
+
+"""Filename matching with shell patterns.
+
+fnmatch(FILENAME, PATTERN) matches according to the local convention.
+fnmatchcase(FILENAME, PATTERN) always takes case in account.
+
+The functions operate by translating the pattern into a regular
+expression.  They cache the compiled regular expressions for speed.
+
+The function translate(PATTERN) returns a regular expression
+corresponding to PATTERN.  (It does not compile it.)
+"""
+
+import re
+
+__all__ = [ "filter", "fnmatch", "fnmatchcase", "translate" ]
+
+_cache = { }
+_MAXCACHE = 100
+
+
+def _purge( ):
+    """Clear the pattern cache"""
+    _cache.clear( )
+
+
+def fnmatch( name, pat ):
+    """Test whether FILENAME matches PATTERN.
+
+    Patterns are Unix shell style:
+
+    *       matches everything
+    ?       matches any single character
+    [seq]   matches any character in seq
+    [!seq]  matches any char not in seq
+
+    An initial period in FILENAME is not special.
+    Both FILENAME and PATTERN are first case-normalized
+    if the operating system requires it.
+    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
+
+    >>> fnmatch('bar', '*' )
+    True
+    >>> fnmatch('foo/bar', '*' )
+    False
+    >>> fnmatch('foo/bar', '**' )
+    True
+    >>> fnmatch('foo/bar', '*/*' )
+    True
+    >>> fnmatch('foo/bar', '**/*' )
+    True
+    >>> fnmatch('/bar', '**/*' )
+    True
+    >>> fnmatch('/', '**' )
+    True
+    >>> fnmatch('/', '*' )
+    False
+    """
+
+    import os
+
+    name = os.path.normcase( name )
+    pat = os.path.normcase( pat )
+    return fnmatchcase( name, pat )
+
+
+def filter( names, pat ):
+    """Return the subset of the list NAMES that match PAT"""
+    import os, posixpath
+
+    result = [ ]
+    pat = os.path.normcase( pat )
+    if not pat in _cache:
+        res = translate( pat )
+        if len( _cache ) >= _MAXCACHE:
+            _cache.clear( )
+        _cache[ pat ] = re.compile( res )
+    match = _cache[ pat ].match
+    if os.path is posixpath:
+        # normcase on posix is NOP. Optimize it away from the loop.
+        for name in names:
+            if match( name ):
+                result.append( name )
+    else:
+        for name in names:
+            if match( os.path.normcase( name ) ):
+                result.append( name )
+    return result
+
+
+def fnmatchcase( name, pat ):
+    """Test whether FILENAME matches PATTERN, including case.
+
+    This is a version of fnmatch() which doesn't case-normalize
+    its arguments.
+    """
+
+    if not pat in _cache:
+        res = translate( pat )
+        if len( _cache ) >= _MAXCACHE:
+            _cache.clear( )
+        _cache[ pat ] = re.compile( res )
+    return _cache[ pat ].match( name ) is not None
+
+
+def translate( pat ):
+    """Translate a shell PATTERN to a regular expression.
+
+    There is no way to quote meta-characters.
+    """
+
+    i, n = 0, len( pat )
+    res = ''
+    while i < n:
+        c = pat[ i ]
+        i += 1
+        if c == '*':
+            if i < len(pat) and pat[i] == '*':
+                i += 1
+                res += '.*'
+            else:
+                res += '[^/]*'
+        elif c == '?':
+            res += '.'
+        elif c == '[':
+            j = i
+            if j < n and pat[ j ] == '!':
+                j += 1
+            if j < n and pat[ j ] == ']':
+                j += 1
+            while j < n and pat[ j ] != ']':
+                j += 1
+            if j >= n:
+                res += '\\['
+            else:
+                stuff = pat[ i:j ].replace( '\\', '\\\\' )
+                i = j + 1
+                if stuff[ 0 ] == '!':
+                    stuff = '^' + stuff[ 1: ]
+                elif stuff[ 0 ] == '^':
+                    stuff = '\\' + stuff
+                res = '%s[%s]' % (res, stuff)
+        else:
+            res = res + re.escape( c )
+    return res + '\Z(?ms)'
diff --git a/src/bd2k/util/hashes.py b/src/bd2k/util/hashes.py
new file mode 100644
index 0000000..29e6aae
--- /dev/null
+++ b/src/bd2k/util/hashes.py
@@ -0,0 +1,132 @@
+def hash_json( hash_obj, value ):
+    """
+    Compute the hash of a parsed JSON value using the given hash object. This function does not
+    hash the JSON value, it hashes the object tree that is the result of parsing a string in JSON
+    format. Hashables (JSON objects) are hashed entry by entry in order of the lexicographical
+    ordering on the keys. Iterables are hashed in their inherent order.
+
+    If value or any of its children is an iterable with non-deterministic ordering of its
+    elements, e.g. a set, this method will yield non-deterministic results.
+
+    :param hash_obj: one of the Hash objects in hashlib, or any other object that has an update(s)
+           method accepting a single string.
+
+    :type value: int|str|float|Iterable[type(obj)]|Hashable[str,type(obj)]
+    :param value: The value to be hashed
+
+    >>> import hashlib
+    >>> def actual(x): h = hashlib.md5(); hash_json(h,x); return h.hexdigest()
+    >>> def expect(s): h = hashlib.md5(); h.update(s); return h.hexdigest()
+
+    >>> actual(0) == expect('0')
+    True
+    >>> actual(0.0) == expect('0.0')
+    True
+    >>> actual(0.1) == expect('0.1')
+    True
+    >>> actual(True) == expect('true')
+    True
+    >>> actual(False) == expect('false')
+    True
+    >>> actual("") == expect('""')
+    True
+    >>> actual([]) == expect('[]')
+    True
+    >>> actual([0]) == expect('[0]')
+    True
+    >>> actual([0,1]) == expect('[0,1]')
+    True
+    >>> actual({}) == expect('{}')
+    True
+    >>> actual({'':0}) == expect('{:0}')
+    True
+    >>> actual({'0':0}) == expect('{0:0}')
+    True
+    >>> actual({'0':0,'1':1}) == expect('{0:0,1:1}')
+    True
+    >>> actual({'':[]}) == expect('{:[]}')
+    True
+    >>> actual([{}]) == expect('[{}]')
+    True
+    >>> actual({0:0})
+    Traceback (most recent call last):
+    ...
+    ValueError: Dictionary keys must be strings, not <type 'int'>
+    >>> actual(object())
+    Traceback (most recent call last):
+    ...
+    ValueError: Type <type 'object'> is not supported
+    """
+    try:
+        items = value.iteritems( )
+    except AttributeError:
+        # Must check for string before testing iterability since strings are iterable
+        if isinstance( value, basestring ):
+            _hash_string( hash_obj, value )
+        else:
+            try:
+                iterator = iter( value )
+            except TypeError:
+                # We must check for bool first since it is subclass of int (wrongly, IMHO)
+                if isinstance( value, bool ):
+                    _hash_bool( hash_obj, value )
+                elif isinstance( value, (int, float) ):
+                    _hash_number( hash_obj, value )
+                else:
+                    raise ValueError( 'Type %s is not supported' % type( value ) )
+            else:
+                _hash_iterable( hash_obj, iterator )
+    else:
+        _hash_hashable( hash_obj, items )
+
+
+def _hash_number( hash_obj, n ):
+    hash_obj.update( str( n ) )
+
+
+def _hash_bool( hash_obj, b ):
+    hash_obj.update( 'true' if b else 'false' )
+
+
+def _hash_string( hash_obj, s ):
+    hash_obj.update( '"' )
+    hash_obj.update( s )
+    hash_obj.update( '"' )
+
+
+def _hash_iterable( hash_obj, items ):
+    hash_obj.update( '[' )
+    try:
+        item = next( items )
+        hash_json( hash_obj, item )
+        while True:
+            item = next( items )
+            hash_obj.update( ',' )
+            hash_json( hash_obj, item )
+    except StopIteration:
+        pass
+    hash_obj.update( ']' )
+
+
+def _hash_hashable( hash_obj, items ):
+    items = iter( sorted( items ) )
+    hash_obj.update( '{' )
+    try:
+        item = next( items )
+        _hash_hashable_item( hash_obj, item )
+        while True:
+            item = next( items )
+            hash_obj.update( ',' )
+            _hash_hashable_item( hash_obj, item )
+    except StopIteration:
+        pass
+    hash_obj.update( '}' )
+
+
+def _hash_hashable_item( hash_obj, (k, v) ):
+    if isinstance( k, basestring ):
+        hash_obj.update( k )
+        hash_obj.update( ':' )
+        hash_json( hash_obj, v )
+    else:
+        raise ValueError( 'Dictionary keys must be strings, not %s' % type( k ) )
diff --git a/src/bd2k/util/humanize.py b/src/bd2k/util/humanize.py
new file mode 100644
index 0000000..32b8a3b
--- /dev/null
+++ b/src/bd2k/util/humanize.py
@@ -0,0 +1,131 @@
+# http://code.activestate.com/recipes/578019-bytes-to-human-human-to-bytes-converter/
+
+"""
+Bytes-to-human / human-to-bytes converter.
+Based on: http://goo.gl/kTQMs
+Working with Python 2.x and 3.x.
+
+Author: Giampaolo Rodola' <g.rodola [AT] gmail [DOT] com>
+License: MIT
+"""
+
+# see: http://goo.gl/kTQMs
+SYMBOLS = {
+    'customary'     : ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
+    'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
+                       'zetta', 'iotta'),
+    'iec'           : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
+    'iec_ext'       : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
+                       'zebi', 'yobi'),
+}
+
+def bytes2human(n, fmt='%(value).1f %(symbol)s', symbols='customary'):
+    """
+    Convert n bytes into a human readable string based on format.
+    symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
+    see: http://goo.gl/kTQMs
+
+      >>> bytes2human(0)
+      '0.0 '
+      >>> bytes2human(0.9)
+      '0.0 '
+      >>> bytes2human(1)
+      '1.0 '
+      >>> bytes2human(1.9)
+      '1.0 '
+      >>> bytes2human(1024)
+      '1.0 K'
+      >>> bytes2human(1048576)
+      '1.0 M'
+      >>> bytes2human(1099511627776127398123789121)
+      '909.5 Y'
+
+      >>> bytes2human(9856, symbols="customary")
+      '9.6 K'
+      >>> bytes2human(9856, symbols="customary_ext")
+      '9.6 kilo'
+      >>> bytes2human(9856, symbols="iec")
+      '9.6 Ki'
+      >>> bytes2human(9856, symbols="iec_ext")
+      '9.6 kibi'
+
+      >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
+      '9.8 K/sec'
+
+      >>> # precision can be adjusted by playing with %f operator
+      >>> bytes2human(10000, fmt="%(value).5f %(symbol)s")
+      '9.76562 K'
+    """
+    n = int(n)
+    if n < 0:
+        raise ValueError("n < 0")
+    symbols = SYMBOLS[symbols]
+    prefix = {}
+    for i, s in enumerate(symbols[1:]):
+        prefix[s] = 1 << (i+1)*10
+    for symbol in reversed(symbols[1:]):
+        if n >= prefix[symbol]:
+            value = float(n) / prefix[symbol]
+            return fmt % locals()
+    return fmt % dict(symbol=symbols[0], value=n)
+
+def human2bytes(s):
+    """
+    Attempts to guess the string format based on default symbols
+    set and return the corresponding bytes as an integer.
+    When unable to recognize the format ValueError is raised.
+
+      >>> human2bytes('0 ')
+      0
+      >>> human2bytes('1 K')
+      1024
+      >>> human2bytes('1K')
+      1024
+      >>> human2bytes('1.1K') == 1024 + 102
+      True
+      >>> human2bytes('1 M')
+      1048576
+      >>> human2bytes('1 Gi')
+      1073741824
+      >>> human2bytes('1 tera')
+      1099511627776
+
+      >>> human2bytes('0.5kilo')
+      512
+      >>> human2bytes('0.1  byte')
+      0
+      >>> human2bytes('1  byte')
+      1
+      >>> human2bytes('1 k')  # k is an alias for K
+      1024
+      >>> human2bytes('12 foo')
+      Traceback (most recent call last):
+          ...
+      ValueError: can't interpret '12 foo'
+    """
+    init = s
+    num = ""
+    while s and s[0:1].isdigit() or s[0:1] == '.':
+        num += s[0]
+        s = s[1:]
+    num = float(num)
+    letter = s.strip()
+    for name, sset in SYMBOLS.items():
+        if letter in sset:
+            break
+    else:
+        if letter == 'k':
+            # treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
+            sset = SYMBOLS['customary']
+            letter = letter.upper()
+        else:
+            raise ValueError("can't interpret %r" % init)
+    prefix = {sset[0]:1}
+    for i, s in enumerate(sset[1:]):
+        prefix[s] = 1 << (i+1)*10
+    return int(num * prefix[letter])
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/src/bd2k/util/iterables.py b/src/bd2k/util/iterables.py
new file mode 100644
index 0000000..9235753
--- /dev/null
+++ b/src/bd2k/util/iterables.py
@@ -0,0 +1,169 @@
+from itertools import takewhile, izip, izip_longest, dropwhile, imap, chain
+
+
+def common_prefix( xs, ys ):
+    """
+    >>> list( common_prefix('','') )
+    []
+    >>> list( common_prefix('A','') )
+    []
+    >>> list( common_prefix('','A') )
+    []
+    >>> list( common_prefix('A','A') )
+    ['A']
+    >>> list( common_prefix('AB','A') )
+    ['A']
+    >>> list( common_prefix('A','AB') )
+    ['A']
+    >>> list( common_prefix('A','B') )
+    []
+    """
+    return imap( lambda (x, y): x, takewhile( lambda (a, b): a == b, izip( xs, ys ) ) )
+
+
+def disparate_suffix( xs, ys ):
+    """
+    >>> list( disparate_suffix('','') )
+    []
+    >>> list( disparate_suffix('A','') )
+    [('A', None)]
+    >>> list( disparate_suffix('','A') )
+    [(None, 'A')]
+    >>> list( disparate_suffix('A','A') )
+    []
+    >>> list( disparate_suffix('AB','A') )
+    [('B', None)]
+    >>> list( disparate_suffix('A','AB') )
+    [(None, 'B')]
+    >>> list( disparate_suffix('A','B') )
+    [('A', 'B')]
+    """
+    return dropwhile( lambda (a, b): a == b, izip_longest( xs, ys ) )
+
+
+def flatten( iterables ):
+    return chain.from_iterable( iterables )
+
+
+# noinspection PyPep8Naming
+class concat( object ):
+    """
+    A literal iterable that lets you combine sequence literals (lists, set) with generators or list
+    comprehensions. Instead of
+
+    >>> [ -1 ] + [ x * 2 for x in range( 3 ) ] + [ -1 ]
+    [-1, 0, 2, 4, -1]
+
+    you can write
+
+    >>> list( concat( -1, ( x * 2 for x in range( 3 ) ), -1 ) )
+    [-1, 0, 2, 4, -1]
+
+    This is slightly shorter (not counting the list constructor) and does not involve array
+    construction or concatenation.
+
+    Note that concat() flattens (or chains) all iterable arguments into a single result iterable:
+
+    >>> list( concat( 1, xrange( 2, 4 ), 4 ) )
+    [1, 2, 3, 4]
+
+    It only does so one level deep. If you need to recursively flatten a data structure,
+    check out crush().
+
+    If you want to prevent that flattening for an iterable argument, wrap it in concat():
+
+    >>> list( concat( 1, concat( xrange( 2, 4 ) ), 4 ) )
+    [1, xrange(2, 4), 4]
+
+    Some more example.
+
+    >>> list( concat() ) # empty concat
+    []
+    >>> list( concat( 1 ) ) # non-iterable
+    [1]
+    >>> list( concat( concat() ) ) # empty iterable
+    []
+    >>> list( concat( concat( 1 ) ) ) # singleton iterable
+    [1]
+    >>> list( concat( 1, concat( 2 ), 3 ) ) # flattened iterable
+    [1, 2, 3]
+    >>> list( concat( 1, [2], 3 ) ) # flattened iterable
+    [1, 2, 3]
+    >>> list( concat( 1, concat( [2] ), 3 ) ) # protecting an iterable from being flattened
+    [1, [2], 3]
+    >>> list( concat( 1, concat( [2], 3 ), 4 ) ) # protection only works with a single argument
+    [1, 2, 3, 4]
+    >>> list( concat( 1, 2, concat( 3, 4 ), 5, 6 ) )
+    [1, 2, 3, 4, 5, 6]
+    >>> list( concat( 1, 2, concat( [ 3, 4 ] ), 5, 6 ) )
+    [1, 2, [3, 4], 5, 6]
+
+    Note that while strings are technically iterable, concat() does not flatten them.
+
+    >>> list( concat( 'ab' ) )
+    ['ab']
+    >>> list( concat( concat( 'ab' ) ) )
+    ['ab']
+    """
+
+    def __init__( self, *args ):
+        super( concat, self ).__init__( )
+        self.args = args
+
+    def __iter__( self ):
+        def expand( x ):
+            if isinstance( x, concat ) and len( x.args ) == 1:
+                i = x.args
+            else:
+                try:
+                    i = x.__iter__( )
+                except AttributeError:
+                    i = x,
+            return i
+
+        return flatten( imap( expand, self.args ) )
+
+
+# noinspection PyPep8Naming
+class crush( object ):
+    """
+    >>> list(crush([]))
+    []
+    >>> list(crush([[]]))
+    []
+    >>> list(crush([1]))
+    [1]
+    >>> list(crush([[1]]))
+    [1]
+    >>> list(crush([[[]]]))
+    []
+    >>> list(crush([1,(),['two'],([3, 4],),{5}]))
+    [1, 'two', 3, 4, 5]
+
+    >>> list(crush(1))
+    Traceback (most recent call last):
+    ...
+    TypeError: 'int' object is not iterable
+
+    >>> list(crush('123'))
+    ['1', '2', '3']
+
+    The above is a bit of an anomaly since strings occurring inside iterables are not broken up:
+
+    >>> list(crush(['123']))
+    ['123']
+    """
+
+    def __init__( self, iterables ):
+        super( crush, self ).__init__( )
+        self.iterables = iterables
+
+    def __iter__( self ):
+        def expand( x ):
+            try:
+                # Using __iter__() instead of iter() prevents breaking up of strings
+                return crush( x.__iter__( ) )
+            except AttributeError:
+                return x,
+
+        return flatten( imap( expand, self.iterables ) )
diff --git a/src/bd2k/util/lockfile.py b/src/bd2k/util/lockfile.py
new file mode 100644
index 0000000..deebb9d
--- /dev/null
+++ b/src/bd2k/util/lockfile.py
@@ -0,0 +1,36 @@
+from __future__ import absolute_import
+
+import errno
+import logging as log
+import os
+
+try:
+    from lockfile.pidlockfile import PIDLockFile
+except:
+    pass
+else:
+    class SmartPIDLockFile( PIDLockFile ):
+        """
+        A PID lock file that breaks the lock if the owning process doesn't exist
+        """
+
+        def process_alive( self, pid ):
+            try:
+                os.kill( pid, 0 )
+                # now we know the process exists
+                return True
+            except OSError as e:
+                if e.errno == errno.ESRCH:
+                    # now we know the process doesn't exist
+                    return False
+                else:
+                    # now we're not sure
+                    return None
+
+        def acquire( self, timeout=None ):
+            owner = self.read_pid( )
+            if owner is not None and owner != os.getpid( ) and self.process_alive( owner ) is False:
+                log.warn( "Breaking lock '%s' since owning process %i is dead."
+                          % (self.lock_file, owner) )
+                self.break_lock( )
+            PIDLockFile.acquire( self, timeout )
diff --git a/src/bd2k/util/logging.py b/src/bd2k/util/logging.py
new file mode 100644
index 0000000..93fec58
--- /dev/null
+++ b/src/bd2k/util/logging.py
@@ -0,0 +1,29 @@
+from __future__ import absolute_import
+
+import codecs
+import types
+import logging
+
+
+class Utf8SyslogFormatter( logging.Formatter ):
+    """
+    Works around http://bugs.python.org/issue14452
+    """
+
+    def format( self, record ):
+        origGetMessage = record.getMessage
+
+        def getMessage( _self ):
+            msg = origGetMessage( )
+            if isinstance( msg, unicode ):
+                try:
+                    # First check if we can represent the message as ASCII without loosing
+                    # information. That we we can avoid writing the BOM unless absolutely necessary.
+                    msg = msg.encode( 'ascii' )
+                except UnicodeEncodeError:
+                    msg = codecs.BOM + msg.encode( 'utf8' )
+            return msg
+
+        types.MethodType( getMessage, record, logging.LogRecord )
+        record.getMessage = types.MethodType( getMessage, record, logging.LogRecord )
+        return logging.Formatter.format( self, record )
diff --git a/src/bd2k/util/objects.py b/src/bd2k/util/objects.py
new file mode 100644
index 0000000..4f44e7d
--- /dev/null
+++ b/src/bd2k/util/objects.py
@@ -0,0 +1,217 @@
+from __future__ import absolute_import
+from bd2k.util import sync_memoize
+
+
+class abstractclassmethod( classmethod ):
+    """
+    This class defines a decorator that allows the decorated class to be both an abstract method
+    and a class method.
+
+    Shamelessly stolen from
+
+    http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod
+
+    >>> from abc import ABCMeta
+
+    >>> class DemoABC:
+    ...     __metaclass__ = ABCMeta
+    ...
+    ...     @abstractclassmethod
+    ...     def from_int(cls, n):
+    ...         return cls()
+
+    >>> class DemoConcrete(DemoABC):
+    ...     @classmethod
+    ...     def from_int(cls, n):
+    ...         return cls(2*n)
+    ...
+    ...     def __init__(self, n):
+    ...         print ('Initializing with %s' % n)
+
+    >>> d = DemoConcrete(5)  # Succeeds by calling a concrete __init__()
+    Initializing with 5
+
+    >>> d = DemoConcrete.from_int(5)  # Succeeds by calling a concrete from_int()
+    Initializing with 10
+
+    >>> DemoABC()  # Fails because from_int() is abstract
+    Traceback (most recent call last):
+    ...
+    TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int
+
+    >>> DemoABC.from_int(5)  # Fails because from_int() is not implemented
+    Traceback (most recent call last):
+    ...
+    TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int
+    """
+    __isabstractmethod__ = True
+
+    def __init__(self, callable):
+        callable.__isabstractmethod__ = True
+        super(abstractclassmethod, self).__init__(callable)
+
+
+class abstractstaticmethod( staticmethod ):
+    """
+    This class defines a decorator that allows the decorated class to be both an abstract method
+    and a static method.
+
+    Based on code found at
+
+    http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod
+
+    >>> from abc import ABCMeta
+
+    >>> class DemoABC:
+    ...     __metaclass__ = ABCMeta
+    ...
+    ...     @abstractstaticmethod
+    ...     def f(n):
+    ...         raise NotImplementedError()
+
+    >>> class DemoConcrete(DemoABC):
+    ...     @staticmethod
+    ...     def f(n):
+    ...         return (2*n)
+
+    >>> d = DemoABC.f(5)  # Fails because f() is not implemented
+    Traceback (most recent call last):
+    ...
+    NotImplementedError
+
+    >>> DemoConcrete.f(5)  # Succeeds by calling a concrete f()
+    10
+    """
+    __isabstractmethod__ = True
+
+    def __init__(self, callable):
+        callable.__isabstractmethod__ = True
+        super(abstractstaticmethod, self).__init__(callable)
+
+
+class InnerClass( object ):
+    """
+    Note that this is EXPERIMENTAL code.
+
+    A nested class (the inner class) decorated with this will have an additional attribute called
+    'outer' referencing the instance of the nesting class (the outer class) that was used to
+    create the inner class. The outer instance does not need to be passed to the inner class's
+    constructor, it will be set magically. Shamelessly stolen from
+
+    http://stackoverflow.com/questions/2278426/inner-classes-how-can-i-get-the-outer-class-object-at-construction-time#answer-2278595.
+
+    with names made more descriptive (I hope) and added caching of the BoundInner classes.
+
+    Caveat: Within the inner class, self.__class__ will not be the inner class but a dynamically
+    created subclass thereof. It's name will be the same as that of the inner class,
+    but its __module__ will be different. There will be one such dynamic subclass per inner class
+    and instance of outer class, if that outer class instance created any instances of inner the
+    class.
+
+    >>> class Outer(object):
+    ...     def new_inner(self):
+    ...         # self is an instance of the outer class
+    ...         inner = self.Inner()
+    ...         # the inner instance's 'outer' attribute is set to the outer instance
+    ...         assert inner.outer is self
+    ...         return inner
+    ...     @InnerClass
+    ...     class Inner(object):
+    ...         def get_outer(self):
+    ...             return self.outer
+    ...         @classmethod
+    ...         def new_inner(cls):
+    ...             return cls()
+    >>> o = Outer()
+    >>> i = o.new_inner()
+    >>> i # doctest: +ELLIPSIS
+    <bd2k.util.objects.Inner object at ...> bound to <bd2k.util.objects.Outer object at ...>
+
+    >>> i.get_outer() # doctest: +ELLIPSIS
+    <bd2k.util.objects.Outer object at ...>
+
+    Now with inheritance for both inner and outer:
+
+    >>> class DerivedOuter(Outer):
+    ...     def new_inner(self):
+    ...         return self.DerivedInner()
+    ...     @InnerClass
+    ...     class DerivedInner(Outer.Inner):
+    ...         def get_outer(self):
+    ...             assert super( DerivedOuter.DerivedInner, self ).get_outer() == self.outer
+    ...             return self.outer
+    >>> derived_outer = DerivedOuter()
+    >>> derived_inner = derived_outer.new_inner()
+    >>> derived_inner # doctest: +ELLIPSIS
+    <bd2k.util.objects.DerivedInner object at ...> bound to <bd2k.util.objects.DerivedOuter object at ...>
+
+    >>> derived_inner.get_outer() # doctest: +ELLIPSIS
+    <bd2k.util.objects.DerivedOuter object at ...>
+
+    Test a static references:
+    >>> Outer.Inner
+    <class 'bd2k.util.objects.Inner'>
+    >>> DerivedOuter.Inner
+    <class 'bd2k.util.objects.Inner'>
+    >>> DerivedOuter.DerivedInner
+    <class 'bd2k.util.objects.DerivedInner'>
+
+    Can't decorate top-level classes. Unfortunately, this is detected when the instance is
+    created, not when the class is defined.
+    >>> @InnerClass
+    ... class Foo(object):
+    ...    pass
+    >>> Foo()
+    Traceback (most recent call last):
+    ...
+    RuntimeError: Inner classes must be nested in another class.
+
+    All inner instances should refer to a single outer instance:
+    >>> o = Outer()
+    >>> o.new_inner().outer == o == o.new_inner().outer
+    True
+
+    All inner instances should be of the same class ...
+    >>> o.new_inner().__class__ == o.new_inner().__class__
+    True
+
+    ... but that class isn't the inner class ...
+    >>> o.new_inner().__class__ != Outer.Inner
+    True
+
+    ... but a subclass of the inner class.
+    >>> isinstance( o.new_inner(), Outer.Inner )
+    True
+
+    Static and class methods, e.g. should work, too
+
+    >>> o.Inner.new_inner().outer == o
+    True
+    """
+
+    def __init__( self, inner_class ):
+        super( InnerClass, self ).__init__( )
+        self.inner_class = inner_class
+
+    # noinspection PyUnusedLocal
+    def __get__( self, instance, owner ):
+        # No need to wrap a static reference, i.e one that is made via 'Outer.' rather than 'self.'
+        if instance is None:
+            return self.inner_class
+        else:
+            return self._bind( instance )
+
+    @sync_memoize
+    def _bind( self, _outer ):
+        class BoundInner( self.inner_class ):
+            outer = _outer
+
+            def __repr__( self ):
+                return "%s bound to %s" % (super( BoundInner, self ).__repr__( ), repr( _outer ))
+
+        BoundInner.__name__ = self.inner_class.__name__
+        BoundInner.__module__ = self.inner_class.__module__
+        return BoundInner
+
+    def __call__( *args, **kwargs ):
+        raise RuntimeError( "Inner classes must be nested in another class." )
diff --git a/src/bd2k/util/processes.py b/src/bd2k/util/processes.py
new file mode 100644
index 0000000..a80732a
--- /dev/null
+++ b/src/bd2k/util/processes.py
@@ -0,0 +1,35 @@
+import os
+
+
+def which( name, path=None ):
+    """
+    Look for an executable file of the given name in the given list of directories,
+    or the directories listed in the PATH variable of the current environment. Roughly the
+    equivalent of the `which` program. Does not work on Windows.
+
+    :type name: str
+    :param name: the name of the program
+
+    :type path: Iterable
+    :param path: the directory paths to consider or None if the directories referenced in the
+    PATH environment variable should be used instead
+
+    :returns: an iterator yielding the full path to every occurrance of an executable file of the
+    given name in a directory on the given path or the PATH environment variable if no path was
+    passed
+
+    >>> next( which('ls') )
+    '/bin/ls'
+    >>> list( which('asdalskhvxjvkjhsdasdnbmfiewwewe') )
+    []
+    >>> list( which('ls', path=()) )
+    []
+    """
+    if path is None:
+        path = os.environ.get( 'PATH' )
+        if path is None: return
+        path = path.split( os.pathsep )
+    for bin_dir in path:
+        executable_path = os.path.join( bin_dir, name )
+        if os.access( executable_path, os.X_OK ):
+            yield executable_path
diff --git a/src/bd2k/util/retry.py b/src/bd2k/util/retry.py
new file mode 100644
index 0000000..031b338
--- /dev/null
+++ b/src/bd2k/util/retry.py
@@ -0,0 +1,138 @@
+from __future__ import absolute_import
+
+import time
+import urllib2
+from contextlib import contextmanager
+
+import logging
+
+log = logging.getLogger( __name__ )
+
+
+# noinspection PyUnusedLocal
+def never( exception ):
+    return False
+
+
+def retry( delays=(0, 1, 1, 4, 16, 64), timeout=300, predicate=never ):
+    """
+    Retry an operation while the failure matches a given predicate and until a given timeout
+    expires, waiting a given amount of time in between attempts. This function is a generator
+    that yields contextmanagers. See doctests below for example usage.
+
+    :param Iterable[float] delays: an interable yielding the time in seconds to wait before each
+           retried attempt, the last element of the iterable will be repeated.
+
+    :param float timeout: a overall timeout that should not be exceeded for all attempts together.
+           This is a best-effort mechanism only and it won't abort an ongoing attempt, even if the
+           timeout expires during that attempt.
+
+    :param Callable[[Exception],bool] predicate: a unary callable returning True if another
+           attempt should be made to recover from the given exception. The default value for this
+           parameter will prevent any retries!
+
+    :return: a generator yielding context managers, one per attempt
+    :rtype: Iterator
+
+    Retry for a limited amount of time:
+
+    >>> true = lambda _:True
+    >>> false = lambda _:False
+    >>> i = 0
+    >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ):
+    ...     with attempt:
+    ...         i += 1
+    ...         raise RuntimeError('foo')
+    Traceback (most recent call last):
+    ...
+    RuntimeError: foo
+    >>> i > 1
+    True
+
+    If timeout is 0, do exactly one attempt:
+
+    >>> i = 0
+    >>> for attempt in retry( timeout=0 ):
+    ...     with attempt:
+    ...         i += 1
+    ...         raise RuntimeError( 'foo' )
+    Traceback (most recent call last):
+    ...
+    RuntimeError: foo
+    >>> i
+    1
+
+    Don't retry on success:
+
+    >>> i = 0
+    >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ):
+    ...     with attempt:
+    ...         i += 1
+    >>> i
+    1
+
+    Don't retry on unless predicate returns True:
+
+    >>> i = 0
+    >>> for attempt in retry( delays=[0], timeout=.1, predicate=false):
+    ...     with attempt:
+    ...         i += 1
+    ...         raise RuntimeError( 'foo' )
+    Traceback (most recent call last):
+    ...
+    RuntimeError: foo
+    >>> i
+    1
+    """
+    if timeout > 0:
+        go = [ None ]
+
+        @contextmanager
+        def repeated_attempt( delay ):
+            try:
+                yield
+            except Exception as e:
+                if time.time( ) + delay < expiration and predicate( e ):
+                    log.info( 'Got %s, trying again in %is.', e, delay )
+                    time.sleep( delay )
+                else:
+                    raise
+            else:
+                go.pop( )
+
+        delays = iter( delays )
+        expiration = time.time( ) + timeout
+        delay = next( delays )
+        while go:
+            yield repeated_attempt( delay )
+            delay = next( delays, delay )
+    else:
+        @contextmanager
+        def single_attempt( ):
+            yield
+
+        yield single_attempt( )
+
+
+default_delays = (0, 1, 1, 4, 16, 64)
+default_timeout = 300
+
+
+def retryable_http_error( e ):
+    return isinstance( e, urllib2.HTTPError ) and e.code in ('503', '408', '500')
+
+
+def retry_http( delays=default_delays, timeout=default_timeout, predicate=retryable_http_error ):
+    """
+    >>> i = 0
+    >>> for attempt in retry_http(timeout=5):
+    ...     with attempt:
+    ...         i += 1
+    ...         raise urllib2.HTTPError('http://www.test.com', '408', 'some message', {}, None)
+    Traceback (most recent call last):
+    ...
+    HTTPError: HTTP Error 408: some message
+    >>> i > 1
+    True
+    """
+    return retry( delays=delays, timeout=timeout, predicate=predicate )
diff --git a/src/bd2k/util/shell.py b/src/bd2k/util/shell.py
new file mode 100644
index 0000000..562b677
--- /dev/null
+++ b/src/bd2k/util/shell.py
@@ -0,0 +1,28 @@
+import re
+
+
+def quote(s, level=1):
+    for i in xrange( 0, level ):
+        s = _quote( s )
+    return s
+
+
+_find_unsafe = re.compile( r'[^\w@%+=:,./-]' ).search
+
+
+def _quote(s):
+    """
+    Return a shell-escaped version of the string *s*.
+
+    Stolen from Python 3's shlex module
+    """
+    if not s:
+        return "''"
+    if _find_unsafe( s ) is None:
+        return s
+
+    # use single quotes, and put single quotes into double quotes
+    # the string $'b is then quoted as '$'"'"'b'
+    return "'" + s.replace( "'", "'\"'\"'" ) + "'"
+
+
diff --git a/src/bd2k/util/strings.py b/src/bd2k/util/strings.py
new file mode 100644
index 0000000..563004f
--- /dev/null
+++ b/src/bd2k/util/strings.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+
+import inspect
+
+
+def to_english( iterable, separator=", ", conjunction=' and ', empty='empty',
+                wrapper=None, pair_conjunction=None):
+    """
+    Convert list to a string containing an enumeration in plain English.
+
+    :param iterable: an iterable of strings or objects that can be cast to a string
+
+    :param separator: the text to insert between elements
+
+    :param conjunction: the text used to connect the final element
+
+    :param empty: the text to be used to represent an empty iterable
+
+    :param wrapper: the text to surround the elements
+
+    :param pair_conjunction: the conjunction to use between elements if there are exactly two of
+                             them, defaults to conjunction
+
+    >>> to_english( [], empty='nada' )
+    'nada'
+    >>> to_english( [ 1 ] )
+    '1'
+    >>> to_english( [ 1, 2 ], conjunction=' or ' )
+    '1 or 2'
+    >>> to_english( [ 1, 2, 3 ], conjunction=' or ')
+    '1, 2 or 3'
+    >>> to_english( [ 1, 2, 3 ], separator='; ', conjunction=' or ')
+    '1; 2 or 3'
+    >>> to_english( [ 1, 2, 3 ], conjunction=', and ', pair_conjunction=' and ' )
+    '1, 2, and 3'
+    >>> to_english( [ 1, 2 ], conjunction=', and ', pair_conjunction=' and ' )
+    '1 and 2'
+    >>> to_english( [ 1 ], conjunction=', and ', pair_conjunction=' and ' )
+    '1'
+    """
+    i = iter( iterable )
+    try:
+        x = i.next( )
+    except StopIteration:
+        return empty
+    r = [ ]
+    while True:
+        x = str( x )
+        if wrapper is not None:
+            x = wrapper + x + wrapper
+        try:
+            n = i.next( )
+        except StopIteration:
+            if len(r) > 2:
+                r.append( conjunction )
+            elif len(r) > 0:
+                r.append( conjunction if pair_conjunction is None else pair_conjunction )
+            r.append( x )
+            break
+        else:
+            if r: r.append( separator )
+            r.append( x )
+            x = n
+    return ''.join( r )
+
+
+def interpolate( template, skip_frames=0, **kwargs ):
+    """
+    Interpolate {…} placeholders in the given template string with the given values or the local
+    variables in the calling scope. The syntax of the format string is the same as for the
+    built-in string format function. Explicitly passed keyword arguments take precedence over
+    local variables which take precedence over global variables.
+
+    Unlike with Python scoping rules, only the variables in a single frame are examined.
+
+    Example usage:
+
+    >>> x = 1
+    >>> interpolate( "{x}" )
+    '1'
+    >>> interpolate( "{x}", x=2 )
+    '2'
+    >>> interpolate( "{x} {y}", y=2 )
+    '1 2'
+
+    Use
+
+    from bd2k.util.strings import interpolate as fmt
+
+    to import this function under a shortened alias.
+    """
+    return __interpolate( template, skip_frames, kwargs )
+
+
+def interpolate_dict( template, dictionary, skip_frames=0 ):
+    """
+    Equivalent to
+
+    interpolate( template, skip_frames, **dictionary )
+
+    Example usage:
+
+    >>> x = 1
+    >>> interpolate_dict( "{x}", {} )
+    '1'
+    >>> interpolate_dict( "{x}", dict(x=2) )
+    '2'
+    >>> interpolate_dict( "{x} {y}", dict(y=2) )
+    '1 2'
+    """
+    return __interpolate( template, skip_frames, dictionary )
+
+
+# This is a separate function such that the depth to the client stack frame is the same for
+# interpolate() and interpolate_dict()
+
+def __interpolate( template, skip_frames, dictionary ):
+    frame = inspect.currentframe( )
+    for i in xrange( skip_frames + 2 ):
+        prev_frame = frame
+        frame = frame.f_back
+        del prev_frame
+    try:
+        env = frame.f_globals.copy( )
+        env.update( frame.f_locals )
+        env.update( dictionary )
+    finally:
+        del frame
+    return template.format( **env )
diff --git a/src/bd2k/util/test/__init__.py b/src/bd2k/util/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/test/test_d32.py b/src/bd2k/util/test/test_d32.py
new file mode 100644
index 0000000..aaf9711
--- /dev/null
+++ b/src/bd2k/util/test/test_d32.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Inspired by JavaScript code found at https://github.com/dominictarr/d64
+
+from __future__ import absolute_import
+from unittest import TestCase
+from bd2k.util.d32 import standard as d32
+import os
+
+
+class TestD32( TestCase ):
+    def test( self ):
+        l = [ os.urandom( i ) for i in xrange( 1000 ) ]
+        self.assertEqual( map( d32.decode, sorted( map( d32.encode, l ) ) ), sorted( l ) )
diff --git a/src/bd2k/util/test/test_d64.py b/src/bd2k/util/test/test_d64.py
new file mode 100644
index 0000000..efdbcc1
--- /dev/null
+++ b/src/bd2k/util/test/test_d64.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Ported from JS found at https://github.com/dominictarr/d64
+
+from __future__ import absolute_import
+from unittest import TestCase
+from bd2k.util.d64 import standard as d64
+import os
+
+
+class TestD64( TestCase ):
+    def test( self ):
+        l = [ os.urandom( i ) for i in xrange( 1000 ) ]
+        self.assertEqual( map( d64.decode, sorted( map( d64.encode, l ) ) ), sorted( l ) )
diff --git a/src/bd2k/util/test/test_files.py b/src/bd2k/util/test/test_files.py
new file mode 100644
index 0000000..c693c16
--- /dev/null
+++ b/src/bd2k/util/test/test_files.py
@@ -0,0 +1,32 @@
+from unittest import TestCase
+
+from mock import MagicMock, call
+
+
+class TestFiles( TestCase ):
+    if False:
+        from bd2k.util.files import gread, gwrite
+        # See comment in module under test
+        def test_gread( self ):
+            for n in range( 0, 4 ):
+                f = MagicMock( )
+                # The mock file contains "12". Each read() invocation shall return one byte from that,
+                # followed by the empty string for EOF.
+                f.read.side_effect = [ '1', '2', '' ]
+                # Read n bytes greedily
+                # noinspection PyTypeChecker
+                self.assertEqual( self.gread( f, n ), "12"[ :n ] )
+                # First call to read() should request n bytes and then one less on each subsequent call.
+                self.assertEqual( f.mock_calls, [ call.read( i ) for i in range( n, 0, -1 ) ] )
+
+        def test_gwrite( self ):
+            for n in range( 0, 3 ):
+                f = MagicMock( )
+                # Each write invocation shall write a single byte.
+                f.write.side_effect = [ 1 ] * n
+                s = "12"[ :n ]
+                # noinspection PyTypeChecker
+                self.gwrite( f, s )
+                # The first call to write() should be passed the entire string, minus one byte off
+                # the front for each subsequent call.
+                self.assertEqual( f.mock_calls, [ call.write( s[ i: ] ) for i in range( 0, n ) ] )
diff --git a/src/bd2k/util/test/test_panic.py b/src/bd2k/util/test/test_panic.py
new file mode 100644
index 0000000..1737875
--- /dev/null
+++ b/src/bd2k/util/test/test_panic.py
@@ -0,0 +1,80 @@
+import inspect
+import logging
+import unittest
+import sys
+
+from bd2k.util.exceptions import panic
+
+log = logging.getLogger( __name__ )
+logging.basicConfig( )
+
+
+class TestPanic( unittest.TestCase ):
+    def test_panic_by_hand( self ):
+        try:
+            self.try_and_panic_by_hand( )
+        except:
+            self.__assert_raised_exception_is_primary( )
+
+    def test_panic( self ):
+        try:
+            self.try_and_panic( )
+        except:
+            self.__assert_raised_exception_is_primary( )
+
+    def test_panic_with_secondary( self ):
+        try:
+            self.try_and_panic_with_secondary( )
+        except:
+            self.__assert_raised_exception_is_primary( )
+
+    def test_nested_panic( self ):
+        try:
+            self.try_and_nested_panic_with_secondary( )
+        except:
+            self.__assert_raised_exception_is_primary( )
+
+    def try_and_panic_by_hand( self ):
+        try:
+            self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+            raise ValueError( "primary" )
+        except Exception:
+            exc_type, exc_value, exc_traceback = sys.exc_info( )
+            try:
+                raise RuntimeError( "secondary" )
+            except Exception:
+                pass
+            raise exc_type, exc_value, exc_traceback
+
+    def try_and_panic( self ):
+        try:
+            self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+            raise ValueError( "primary" )
+        except:
+            with panic( log ):
+                pass
+
+    def try_and_panic_with_secondary( self ):
+        try:
+            self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+            raise ValueError( "primary" )
+        except:
+            with panic( log ):
+                raise RuntimeError( "secondary" )
+
+    def try_and_nested_panic_with_secondary( self ):
+        try:
+            self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+            raise ValueError( "primary" )
+        except:
+            with panic( log ):
+                with panic( log ):
+                    raise RuntimeError( "secondary" )
+
+    def __assert_raised_exception_is_primary( self ):
+        exc_type, exc_value, exc_traceback = sys.exc_info( )
+        self.assertEquals( exc_type, ValueError )
+        self.assertEquals( exc_value.message, "primary" )
+        while exc_traceback.tb_next is not None:
+            exc_traceback = exc_traceback.tb_next
+        self.assertEquals( exc_traceback.tb_lineno, self.line_of_primary_exc )
diff --git a/src/bd2k/util/test/test_strings.py b/src/bd2k/util/test/test_strings.py
new file mode 100644
index 0000000..acc4d10
--- /dev/null
+++ b/src/bd2k/util/test/test_strings.py
@@ -0,0 +1,13 @@
+import unittest
+
+from bd2k.util.strings import interpolate
+from bd2k.util.strings import to_english
+
+foo = 4
+bar = 1
+
+
+class TestStrings( unittest.TestCase ):
+    def test_interpolate( self ):
+        bar = 2  # should override the global foo
+        self.assertEquals( interpolate( "{foo}{bar}" ), "42" )
diff --git a/src/bd2k/util/threading.py b/src/bd2k/util/threading.py
new file mode 100644
index 0000000..da69067
--- /dev/null
+++ b/src/bd2k/util/threading.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+import sys
+import threading
+
+
+class BoundedEmptySemaphore( threading._BoundedSemaphore ):
+    """
+    A bounded semaphore that is initially empty.
+    """
+
+    def __init__( self, value=1, verbose=None ):
+        super( BoundedEmptySemaphore, self ).__init__( value, verbose )
+        for i in xrange( value ):
+            assert self.acquire( blocking=False )
+
+
+class ExceptionalThread( threading.Thread ):
+    """
+    A thread whose join() method re-raises exceptions raised during run(). While join() is
+    idempotent, the exception is only during the first invocation of join() that succesfully
+    joined the thread. If join() times out, no exception will be re reraised even though an
+    exception might already have occured in run().
+
+    When subclassing this thread, override tryRun() instead of run().
+
+    >>> def f():
+    ...     assert 0
+    >>> t = ExceptionalThread(target=f)
+    >>> t.start()
+    >>> t.join()
+    Traceback (most recent call last):
+    ...
+    AssertionError
+
+    >>> class MyThread(ExceptionalThread):
+    ...     def tryRun( self ):
+    ...         assert 0
+    >>> t = MyThread()
+    >>> t.start()
+    >>> t.join()
+    Traceback (most recent call last):
+    ...
+    AssertionError
+
+    """
+
+    exc_info = None
+
+    def run( self ):
+        try:
+            self.tryRun( )
+        except:
+            self.exc_info = sys.exc_info( )
+            raise
+
+    def tryRun( self ):
+        super( ExceptionalThread, self ).run( )
+
+    def join( self, *args, **kwargs ):
+        super( ExceptionalThread, self ).join( *args, **kwargs )
+        if not self.is_alive( ) and self.exc_info is not None:
+            type, value, traceback = self.exc_info
+            self.exc_info = None
+            raise type, value, traceback
+
+
+# noinspection PyPep8Naming
+class defaultlocal( threading.local ):
+    """
+    Thread local storage with default values for each field in each thread
+
+    >>> l = defaultlocal( foo=42 )
+    >>> def f(): print l.foo
+    >>> t = threading.Thread(target=f)
+    >>> t.start() ; t.join()
+    42
+    """
+
+    def __init__( self, **kwargs ):
+        super( defaultlocal, self ).__init__( )
+        self.__dict__.update( kwargs )
diff --git a/src/bd2k/util/throttle.py b/src/bd2k/util/throttle.py
new file mode 100644
index 0000000..a69165f
--- /dev/null
+++ b/src/bd2k/util/throttle.py
@@ -0,0 +1,203 @@
+from __future__ import absolute_import
+
+import time
+import threading
+
+from bd2k.util.threading import BoundedEmptySemaphore
+
+
+class GlobalThrottle:
+    """
+    A thread-safe rate limiter that throttles all threads globally. This should be used to
+    regulate access to a global resource. It can be used as a function/method decorator or as a
+    simple object, using the throttle() method. The token generation starts with the first call
+    to throttle() or the decorated function. Each subsequent call to throttle() will then acquire
+    a token, possibly having to wait until one becomes available. The number of unused tokens
+    will not exceed a limit given at construction time. This is a very basic mechanism to
+    prevent the resource from becoming swamped after longer pauses.
+    """
+
+    def __init__( self, min_interval, max_unused ):
+        self.min_interval = min_interval
+        self.semaphore = BoundedEmptySemaphore( max_unused )
+        self.thread_start_lock = threading.Lock( )
+        self.thread_started = False
+        self.thread = threading.Thread( target=self.generator )
+        self.thread.daemon = True
+
+    def generator( self ):
+        while True:
+            try:
+                self.semaphore.release( )
+            except ValueError:
+                pass
+            time.sleep( self.min_interval )
+
+    def throttle( self, wait=True ):
+        """
+        If the wait parameter is True, this method returns True after suspending the current
+        thread as necessary to ensure that no less than the configured minimum interval passed
+        since the most recent time an invocation of this method returned True in any thread.
+
+        If the wait parameter is False, this method immediatly returns True if at least the
+        configured minimum interval has passed since the most recent time this method returned
+        True in any thread, or False otherwise.
+        """
+        # I think there is a race in Thread.start(), hence the lock
+        with self.thread_start_lock:
+            if not self.thread_started:
+                self.thread.start( )
+                self.thread_started = True
+        return self.semaphore.acquire( blocking=wait )
+
+    def __call__( self, function ):
+        def wrapper( *args, **kwargs ):
+            self.throttle( )
+            return function( *args, **kwargs )
+
+        return wrapper
+
+
+class LocalThrottle:
+    """
+    A thread-safe rate limiter that throttles each thread independently. Can be used as a
+    function or method decorator or as a simple object, via its .throttle() method.
+
+    The use as a decorator is deprecated in favor of throttle().
+    """
+
+    def __init__( self, min_interval ):
+        """
+        Initialize this local throttle.
+
+        :param min_interval: The minimum interval in seconds between invocations of the throttle
+        method or, if this throttle is used as a decorator, invocations of the decorated method.
+        """
+        self.min_interval = min_interval
+        self.per_thread = threading.local( )
+        self.per_thread.last_invocation = None
+
+    def throttle( self, wait=True ):
+        """
+        If the wait parameter is True, this method returns True after suspending the current
+        thread as necessary to ensure that no less than the configured minimum interval has
+        passed since the last invocation of this method in the current thread returned True.
+
+        If the wait parameter is False, this method immediatly returns True (if at least the
+        configured minimum interval has passed since the last time this method returned True in
+        the current thread) or False otherwise.
+        """
+        now = time.time( )
+        last_invocation = self.per_thread.last_invocation
+        if last_invocation is not None:
+            interval = now - last_invocation
+            if interval < self.min_interval:
+                if wait:
+                    remainder = self.min_interval - interval
+                    time.sleep( remainder )
+                else:
+                    return False
+        self.per_thread.last_invocation = now
+        return True
+
+    def __call__( self, function ):
+        def wrapper( *args, **kwargs ):
+            self.throttle( )
+            return function( *args, **kwargs )
+
+        return wrapper
+
+
+class throttle( object ):
+    """
+    A context manager for ensuring that the execution of its body takes at least a given amount
+    of time, sleeping if necessary. It is a simpler version of LocalThrottle if used as a
+    decorator.
+
+    Ensures that body takes at least the given amount of time.
+
+    >>> start = time.time()
+    >>> with throttle(1):
+    ...     pass
+    >>> 1 <= time.time() - start <= 1.1
+    True
+
+    Ditto when used as a decorator.
+
+    >>> @throttle(1)
+    ... def f():
+    ...     pass
+    >>> start = time.time()
+    >>> f()
+    >>> 1 <= time.time() - start <= 1.1
+    True
+
+    If the body takes longer by itself, don't throttle.
+
+    >>> start = time.time()
+    >>> with throttle(1):
+    ...     time.sleep(2)
+    >>> 2 <= time.time() - start <= 2.1
+    True
+
+    Ditto when used as a decorator.
+
+    >>> @throttle(1)
+    ... def f():
+    ...     time.sleep(2)
+    >>> start = time.time()
+    >>> f()
+    >>> 2 <= time.time() - start <= 2.1
+    True
+
+    If an exception occurs, don't throttle.
+
+    >>> start = time.time()
+    >>> try:
+    ...     with throttle(1):
+    ...         raise ValueError('foo')
+    ... except ValueError:
+    ...     end = time.time()
+    ...     raise
+    Traceback (most recent call last):
+    ...
+    ValueError: foo
+    >>> 0 <= end - start <= 0.1
+    True
+
+    Ditto when used as a decorator.
+
+    >>> @throttle(1)
+    ... def f():
+    ...     raise ValueError('foo')
+    >>> start = time.time()
+    >>> try:
+    ...     f()
+    ... except ValueError:
+    ...     end = time.time()
+    ...     raise
+    Traceback (most recent call last):
+    ...
+    ValueError: foo
+    >>> 0 <= end - start <= 0.1
+    True
+    """
+
+    def __init__( self, min_interval ):
+        self.min_interval = min_interval
+
+    def __enter__( self ):
+        self.start = time.time( )
+
+    def __exit__( self, exc_type, exc_val, exc_tb ):
+        if exc_type is None:
+            duration = time.time( ) - self.start
+            remainder = self.min_interval - duration
+            if remainder > 0:
+                time.sleep( remainder )
+
+    def __call__( self, function ):
+        def wrapper( *args, **kwargs ):
+            with self:
+                return function( *args, **kwargs )
+        return wrapper
diff --git a/src/bd2k/util/xml/__init__.py b/src/bd2k/util/xml/__init__.py
new file mode 100644
index 0000000..f34c55b
--- /dev/null
+++ b/src/bd2k/util/xml/__init__.py
@@ -0,0 +1 @@
+__author__ = 'hannes'
diff --git a/src/bd2k/util/xml/builder.py b/src/bd2k/util/xml/builder.py
new file mode 100644
index 0000000..a5ad6cb
--- /dev/null
+++ b/src/bd2k/util/xml/builder.py
@@ -0,0 +1,236 @@
+# This is a port from lxml.builder.E which itself was inspired by an idea by the creator of
+# ElementTree (http://effbot.org/zone/element-builder.htm). Support for namespaces was removed.
+#
+# -- Hannes
+
+#
+# Element generator factory by Fredrik Lundh.
+#
+# Source:
+#    http://online.effbot.org/2006_11_01_archive.htm#et-builder
+#    http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+from __future__ import absolute_import
+
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
+import xml.etree.ElementTree as ET
+
+try:
+    from functools import partial
+except ImportError:
+    # fake it for pre-2.5 releases
+    def partial(func, tag):
+        return lambda *args, **kwargs: func(tag, *args, **kwargs)
+
+try:
+    callable
+except NameError:
+    # Python 3
+    def callable(f):
+        return hasattr(f, '__call__')
+
+try:
+    basestring
+except NameError:
+    basestring = str
+
+try:
+    unicode
+except NameError:
+    unicode = str
+
+
+class ElementMaker(object):
+    """Element generator factory.
+
+    Unlike the ordinary Element factory, the E factory allows you to pass in
+    more than just a tag and some optional attributes; you can also pass in
+    text and other elements.  The text is added as either text or tail
+    attributes, and elements are inserted at the right spot.  Some small
+    examples::
+
+        >>> import xml.etree.ElementTree as ET
+
+        >>> ET.tostring(E("tag"))
+        '<tag />'
+        >>> ET.tostring(E("tag", "text"))
+        '<tag>text</tag>'
+        >>> ET.tostring(E("tag", "text", key="value"))
+        '<tag key="value">text</tag>'
+        >>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
+        '<tag><subtag>text</subtag>tail</tag>'
+
+    For simple tags, the factory also allows you to write ``E.tag(...)`` instead
+    of ``E('tag', ...)``::
+
+        >>> ET.tostring(E.tag())
+        '<tag />'
+        >>> ET.tostring(E.tag("text"))
+        '<tag>text</tag>'
+        >>> ET.tostring(E.tag(E.subtag("text"), "tail"))
+        '<tag><subtag>text</subtag>tail</tag>'
+
+    Here's a somewhat larger example; this shows how to generate HTML
+    documents, using a mix of prepared factory functions for inline elements,
+    nested ``E.tag`` calls, and embedded XHTML fragments::
+
+        # some common inline elements
+        A = E.a
+        I = E.i
+        B = E.b
+
+        def CLASS(v):
+            # helper function, 'class' is a reserved word
+            return {'class': v}
+
+        page = (
+            E.html(
+                E.head(
+                    E.title("This is a sample document")
+                ),
+                E.body(
+                    E.h1("Hello!", CLASS("title")),
+                    E.p("This is a paragraph with ", B("bold"), " text in it!"),
+                    E.p("This is another paragraph, with a ",
+                        A("link", href="http://www.python.org"), "."),
+                    E.p("Here are some reservered characters: <spam&egg>."),
+                    ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"),
+                )
+            )
+        )
+
+        print ET.tostring(page)
+
+    Here's a prettyprinted version of the output from the above script::
+
+        <html>
+          <head>
+            <title>This is a sample document</title>
+          </head>
+          <body>
+            <h1 class="title">Hello!</h1>
+            <p>This is a paragraph with <b>bold</b> text in it!</p>
+            <p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p>
+            <p>Here are some reservered characters: <spam&egg>.</p>
+            <p>And finally, here is an embedded XHTML fragment.</p>
+          </body>
+        </html>
+    """
+
+    def __init__(self, typemap=None,
+                 namespace=None, makeelement=None):
+        if namespace is not None:
+            self._namespace = '{' + namespace + '}'
+        else:
+            self._namespace = None
+
+        if makeelement is not None:
+            assert callable(makeelement)
+            self._makeelement = makeelement
+        else:
+            self._makeelement = ET.Element
+
+        # initialize type map for this element factory
+
+        if typemap:
+            typemap = typemap.copy()
+        else:
+            typemap = {}
+
+        def add_text(elem, item):
+            try:
+                elem[-1].tail = (elem[-1].tail or "") + item
+            except IndexError:
+                elem.text = (elem.text or "") + item
+
+        def add_cdata(elem, cdata):
+            if elem.text:
+                raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
+            elem.text = cdata
+
+        if str not in typemap:
+            typemap[str] = add_text
+        if unicode not in typemap:
+            typemap[unicode] = add_text
+        # if ET.CDATA not in typemap:
+        #     typemap[ET.CDATA] = add_cdata
+
+        def add_dict(elem, item):
+            attrib = elem.attrib
+            for k, v in item.items():
+                if isinstance(v, basestring):
+                    attrib[k] = v
+                else:
+                    attrib[k] = typemap[type(v)](None, v)
+        if dict not in typemap:
+            typemap[dict] = add_dict
+
+        self._typemap = typemap
+
+    def __call__(self, tag, *children, **attrib):
+        get = self._typemap.get
+
+        if self._namespace is not None and tag[0] != '{':
+            tag = self._namespace + tag
+        elem = self._makeelement(tag)
+        if attrib:
+            get(dict)(elem, attrib)
+
+        for item in children:
+            if callable(item):
+                item = item()
+            t = get(type(item))
+            if t is None:
+                if ET.iselement(item):
+                    elem.append(item)
+                    continue
+                for basetype in type(item).__mro__:
+                    # See if the typemap knows of any of this type's bases.
+                    t = get(basetype)
+                    if t is not None:
+                        break
+                else:
+                    raise TypeError("bad argument type: %s(%r)" %
+                                    (type(item).__name__, item))
+            v = t(elem, item)
+            if v:
+                get(type(v))(elem, v)
+
+        return elem
+
+    def __getattr__(self, tag):
+        return partial(self, tag)
+
+# create factory object
+E = ElementMaker()
diff --git a/src/bd2k_python_lib.egg-info/PKG-INFO b/src/bd2k_python_lib.egg-info/PKG-INFO
new file mode 100644
index 0000000..5786e6e
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: bd2k-python-lib
+Version: 1.14a1.dev37
+Summary: The BD2K Python module kitchen sink
+Home-page: https://github.com/BD2KGenomics/bd2k-python-lib
+Author: Hannes Schmidt
+Author-email: hannes at ucsc.edu
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/src/bd2k_python_lib.egg-info/SOURCES.txt b/src/bd2k_python_lib.egg-info/SOURCES.txt
new file mode 100644
index 0000000..6bfe1f1
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/SOURCES.txt
@@ -0,0 +1,41 @@
+setup.cfg
+setup.py
+src/bd2k/__init__.py
+src/bd2k/util/__init__.py
+src/bd2k/util/collections.py
+src/bd2k/util/d32.py
+src/bd2k/util/d64.py
+src/bd2k/util/exceptions.py
+src/bd2k/util/expando.py
+src/bd2k/util/files.py
+src/bd2k/util/fnmatch.py
+src/bd2k/util/hashes.py
+src/bd2k/util/humanize.py
+src/bd2k/util/iterables.py
+src/bd2k/util/lockfile.py
+src/bd2k/util/logging.py
+src/bd2k/util/objects.py
+src/bd2k/util/processes.py
+src/bd2k/util/retry.py
+src/bd2k/util/shell.py
+src/bd2k/util/strings.py
+src/bd2k/util/threading.py
+src/bd2k/util/throttle.py
+src/bd2k/util/ec2/__init__.py
+src/bd2k/util/ec2/credentials.py
+src/bd2k/util/ec2/test/__init__.py
+src/bd2k/util/ec2/test/test_credentials.py
+src/bd2k/util/test/__init__.py
+src/bd2k/util/test/test_d32.py
+src/bd2k/util/test/test_d64.py
+src/bd2k/util/test/test_files.py
+src/bd2k/util/test/test_panic.py
+src/bd2k/util/test/test_strings.py
+src/bd2k/util/xml/__init__.py
+src/bd2k/util/xml/builder.py
+src/bd2k_python_lib.egg-info/PKG-INFO
+src/bd2k_python_lib.egg-info/SOURCES.txt
+src/bd2k_python_lib.egg-info/dependency_links.txt
+src/bd2k_python_lib.egg-info/namespace_packages.txt
+src/bd2k_python_lib.egg-info/pbr.json
+src/bd2k_python_lib.egg-info/top_level.txt
\ No newline at end of file
diff --git a/src/bd2k_python_lib.egg-info/dependency_links.txt b/src/bd2k_python_lib.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/bd2k_python_lib.egg-info/namespace_packages.txt b/src/bd2k_python_lib.egg-info/namespace_packages.txt
new file mode 100644
index 0000000..a3cbc43
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/namespace_packages.txt
@@ -0,0 +1 @@
+bd2k
diff --git a/src/bd2k_python_lib.egg-info/pbr.json b/src/bd2k_python_lib.egg-info/pbr.json
new file mode 100644
index 0000000..e47388c
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/pbr.json
@@ -0,0 +1 @@
+{"is_release": false, "git_version": "a662f3c"}
\ No newline at end of file
diff --git a/src/bd2k_python_lib.egg-info/top_level.txt b/src/bd2k_python_lib.egg-info/top_level.txt
new file mode 100644
index 0000000..a3cbc43
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/top_level.txt
@@ -0,0 +1 @@
+bd2k

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-bd2k.git



More information about the debian-med-commit mailing list