[med-svn] [python-bd2k] 01/02: New upstream version 1.14~alpha1.37
Steffen Möller
moeller at moszumanska.debian.org
Sat Jan 14 09:08:53 UTC 2017
This is an automated email from the git hooks/post-receive script.
moeller pushed a commit to branch master
in repository python-bd2k.
commit 2c0adee92cdcdeb23973889545337a0e586861dc
Author: Steffen Moeller <moeller at debian.org>
Date: Sat Jan 14 09:30:26 2017 +0100
New upstream version 1.14~alpha1.37
---
PKG-INFO | 10 +
setup.cfg | 9 +
setup.py | 51 +++++
src/bd2k/__init__.py | 1 +
src/bd2k/util/__init__.py | 227 ++++++++++++++++++++
src/bd2k/util/collections.py | 162 ++++++++++++++
src/bd2k/util/d32.py | 117 ++++++++++
src/bd2k/util/d64.py | 117 ++++++++++
src/bd2k/util/ec2/__init__.py | 0
src/bd2k/util/ec2/credentials.py | 155 ++++++++++++++
src/bd2k/util/ec2/test/__init__.py | 0
src/bd2k/util/ec2/test/test_credentials.py | 83 ++++++++
src/bd2k/util/exceptions.py | 81 +++++++
src/bd2k/util/expando.py | 117 ++++++++++
src/bd2k/util/files.py | 106 +++++++++
src/bd2k/util/fnmatch.py | 149 +++++++++++++
src/bd2k/util/hashes.py | 132 ++++++++++++
src/bd2k/util/humanize.py | 131 ++++++++++++
src/bd2k/util/iterables.py | 169 +++++++++++++++
src/bd2k/util/lockfile.py | 36 ++++
src/bd2k/util/logging.py | 29 +++
src/bd2k/util/objects.py | 217 +++++++++++++++++++
src/bd2k/util/processes.py | 35 +++
src/bd2k/util/retry.py | 138 ++++++++++++
src/bd2k/util/shell.py | 28 +++
src/bd2k/util/strings.py | 129 +++++++++++
src/bd2k/util/test/__init__.py | 0
src/bd2k/util/test/test_d32.py | 30 +++
src/bd2k/util/test/test_d64.py | 30 +++
src/bd2k/util/test/test_files.py | 32 +++
src/bd2k/util/test/test_panic.py | 80 +++++++
src/bd2k/util/test/test_strings.py | 13 ++
src/bd2k/util/threading.py | 81 +++++++
src/bd2k/util/throttle.py | 203 ++++++++++++++++++
src/bd2k/util/xml/__init__.py | 1 +
src/bd2k/util/xml/builder.py | 236 +++++++++++++++++++++
src/bd2k_python_lib.egg-info/PKG-INFO | 10 +
src/bd2k_python_lib.egg-info/SOURCES.txt | 41 ++++
src/bd2k_python_lib.egg-info/dependency_links.txt | 1 +
.../namespace_packages.txt | 1 +
src/bd2k_python_lib.egg-info/pbr.json | 1 +
src/bd2k_python_lib.egg-info/top_level.txt | 1 +
42 files changed, 3190 insertions(+)
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..5786e6e
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: bd2k-python-lib
+Version: 1.14a1.dev37
+Summary: The BD2K Python module kitchen sink
+Home-page: https://github.com/BD2KGenomics/bd2k-python-lib
+Author: Hannes Schmidt
+Author-email: hannes at ucsc.edu
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..c702222
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,9 @@
+[pytest]
+python_files = *.py
+addopts = --doctest-modules
+
+[egg_info]
+tag_build = .dev37
+tag_date = 0
+tag_svn_revision = 0
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..dde5441
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,51 @@
+import sys
+
+assert sys.version_info >= (2, 6)
+
+from setuptools import setup, find_packages
+
+kwargs = dict(
+ name="bd2k-python-lib",
+ version="1.14a1",
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/bd2k-python-lib',
+ description='The BD2K Python module kitchen sink',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ install_requires=[ ],
+ tests_require=[
+ 'pytest==2.7.2',
+ 'mock==1.0.1',
+ 'lockfile==0.11.0',
+ 'boto==2.38.0' ],
+ namespace_packages=[ 'bd2k' ] )
+
+from setuptools.command.test import test as TestCommand
+
+
+class PyTest( TestCommand ):
+ user_options = [ ('pytest-args=', 'a', "Arguments to pass to py.test") ]
+
+ def initialize_options( self ):
+ TestCommand.initialize_options( self )
+ self.pytest_args = [ ]
+
+ def finalize_options( self ):
+ TestCommand.finalize_options( self )
+ self.test_args = [ ]
+ self.test_suite = True
+
+ def run_tests( self ):
+ import pytest
+ # Sanitize command line arguments to avoid confusing Toil code attempting to parse them
+ sys.argv[ 1: ] = [ ]
+ errno = pytest.main( self.pytest_args )
+ sys.exit( errno )
+
+
+kwargs[ 'cmdclass' ] = { 'test': PyTest }
+
+setup( **kwargs )
diff --git a/src/bd2k/__init__.py b/src/bd2k/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/src/bd2k/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/src/bd2k/util/__init__.py b/src/bd2k/util/__init__.py
new file mode 100644
index 0000000..9c240ae
--- /dev/null
+++ b/src/bd2k/util/__init__.py
@@ -0,0 +1,227 @@
+from __future__ import absolute_import
+
+import datetime
+import grp
+import pwd
+from functools import wraps
+
+from threading import Lock
+
+import re
+
+
+def uid_to_name( uid ):
+ return pwd.getpwuid( uid ).pw_name
+
+
+def gid_to_name( gid ):
+ return grp.getgrgid( gid ).gr_name
+
+
+def name_to_uid( name ):
+ return pwd.getpwnam( name ).pw_uid
+
+
+def name_to_gid( name ):
+ return grp.getgrnam( name ).gr_gid
+
+
+def memoize( f ):
+ """
+ A decorator that memoizes a function result based on its parameters. For example, this can be
+ used in place of lazy initialization. If the decorating function is invoked by multiple
+ threads, the decorated function may be called more than once with the same arguments.
+ """
+
+ # TODO: Recommend that f's arguments be immutable
+
+ memory = { }
+
+ @wraps( f )
+ def new_f( *args ):
+ try:
+ return memory[ args ]
+ except KeyError:
+ r = f( *args )
+ memory[ args ] = r
+ return r
+
+ return new_f
+
+
+def sync_memoize( f ):
+ """
+ Like memoize, but guarantees that decorated function is only called once, even when multiple
+ threads are calling the decorating function with multiple parameters.
+ """
+
+ # TODO: Think about an f that is recursive
+
+ memory = { }
+ lock = Lock( )
+
+ @wraps( f )
+ def new_f( *args ):
+ try:
+ return memory[ args ]
+ except KeyError:
+ # on cache misses, retry with lock held
+ with lock:
+ try:
+ return memory[ args ]
+ except KeyError:
+ r = f( *args )
+ memory[ args ] = r
+ return r
+
+ return new_f
+
+
+def properties( obj ):
+ """
+ Returns a dictionary with one entry per attribute of the given object. The key being the
+ attribute name and the value being the attribute value. Attributes starting in two
+ underscores will be ignored. This function is an alternative to vars() which only returns
+ instance variables, not properties. Note that methods are returned as well but the value in
+ the dictionary is the method, not the return value of the method.
+
+ >>> class Foo():
+ ... def __init__(self):
+ ... self.var = 1
+ ... @property
+ ... def prop(self):
+ ... return self.var + 1
+ ... def meth(self):
+ ... return self.var + 2
+ >>> foo = Foo()
+ >>> properties( foo ) == { 'var':1, 'prop':2, 'meth':foo.meth }
+ True
+
+ Note how the entry for prop is not a bound method (i.e. the getter) but a the return value of
+ that getter.
+ """
+ return dict( (attr, getattr( obj, attr ))
+ for attr in dir( obj )
+ if not attr.startswith( '__' ) )
+
+
+def ilen( it ):
+ """
+ Return the number of elements in an iterable
+
+ >>> ilen(xrange(0,100))
+ 100
+ """
+ return sum( 1 for _ in it )
+
+
+def rfc3339_datetime_re( anchor=True ):
+ """
+ Returns a regular expression for syntactic validation of ISO date-times, RFC-3339 date-times
+ to be precise.
+
+
+ >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39Z') )
+ True
+
+ >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z') )
+ True
+
+ >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39-08:00') )
+ True
+
+ >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123+11:00') )
+ True
+
+ It anchors the matching to the beginning and end of a string by default ...
+
+ >>> bool( rfc3339_datetime_re().search('bla 2013-11-06T15:56:39Z bla') )
+ False
+
+ ... but that can be changed:
+
+ >>> bool( rfc3339_datetime_re( anchor=False ).search('bla 2013-11-06T15:56:39Z bla') )
+ True
+
+ >>> bool( rfc3339_datetime_re( anchor=False ).match('2013-11-06T15:56:39Z bla') )
+ True
+
+ Keep in mind that re.match() always anchors at the beginning:
+
+ >>> bool( rfc3339_datetime_re( anchor=False ).match('bla 2013-11-06T15:56:39Z') )
+ False
+
+ It does not check whether the actual value is a semantically valid datetime:
+
+ >>> bool( rfc3339_datetime_re().match('9999-99-99T99:99:99.9-99:99') )
+ True
+
+ If the regular expression matches, each component of the matching value will be exposed as a
+ captured group in the match object.
+
+ >>> rfc3339_datetime_re().match('2013-11-06T15:56:39Z').groups()
+ ('2013', '11', '06', '15', '56', '39', None, 'Z')
+ >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z').groups()
+ ('2013', '11', '06', '15', '56', '39', '123', 'Z')
+ >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123-08:30').groups()
+ ('2013', '11', '06', '15', '56', '39', '123', '-08:30')
+ """
+ return re.compile(
+ ('^' if anchor else '') +
+ '(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:\d{2})' +
+ ('$' if anchor else '') )
+
+
+_rfc3339_datetime_re = rfc3339_datetime_re( )
+
+
+def parse_iso_utc( s ):
+ """
+ Parses an ISO time with a hard-coded Z for zulu-time (UTC) at the end. Other timezones are
+ not supported.
+
+ :param str s: the ISO-formatted time
+
+ :rtype: datetime.datetime
+
+ :return: an timezone-naive datetime object
+
+ >>> parse_iso_utc('2016-04-27T00:28:04.000Z')
+ datetime.datetime(2016, 4, 27, 0, 28, 4)
+ >>> parse_iso_utc('2016-04-27T00:28:04Z')
+ datetime.datetime(2016, 4, 27, 0, 28, 4)
+ >>> parse_iso_utc('2016-04-27T00:28:04X')
+ Traceback (most recent call last):
+ ...
+ ValueError: Not a valid ISO datetime in UTC: 2016-04-27T00:28:04X
+ """
+ m = _rfc3339_datetime_re.match( s )
+ if not m:
+ raise ValueError( 'Not a valid ISO datetime in UTC: ' + s )
+ else:
+ fmt = '%Y-%m-%dT%H:%M:%S' + ('.%f' if m.group( 7 ) else '') + 'Z'
+ return datetime.datetime.strptime( s, fmt )
+
+
+def strict_bool( s ):
+ """
+ Variant of bool() that only accepts two possible string values.
+ """
+ if s == 'True':
+ return True
+ elif s == 'False':
+ return False
+ else:
+ raise ValueError( s )
+
+
+def less_strict_bool( x ):
+ """
+ Idempotent and None-safe version of strict_bool.
+ """
+ if x is None:
+ return False
+ elif x is True or x is False:
+ return x
+ else:
+ return strict_bool( x )
diff --git a/src/bd2k/util/collections.py b/src/bd2k/util/collections.py
new file mode 100644
index 0000000..175d87d
--- /dev/null
+++ b/src/bd2k/util/collections.py
@@ -0,0 +1,162 @@
+from __future__ import absolute_import
+
+import collections
+from itertools import dropwhile
+
+
+class OrderedSet( collections.MutableSet ):
+ """
+ An ordered set from http://code.activestate.com/recipes/576694/
+
+ Note: Maybe leaky, may have O(N) lookup by index
+
+ TODO: Consider https://github.com/LuminosoInsight/ordered-set which uses a native Python list
+ instead of a linked list
+
+ >>> s = OrderedSet( 'abracadaba' )
+ >>> s
+ OrderedSet(['a', 'b', 'r', 'c', 'd'])
+ >>> t = OrderedSet( 'simsalabim' )
+ >>> t
+ OrderedSet(['s', 'i', 'm', 'a', 'l', 'b'])
+ >>> s | t
+ OrderedSet(['a', 'b', 'r', 'c', 'd', 's', 'i', 'm', 'l'])
+ >>> s & t
+ OrderedSet(['a', 'b'])
+ >>> s - t
+ OrderedSet(['r', 'c', 'd'])
+ >>> t - s
+ OrderedSet(['s', 'i', 'm', 'l'])
+ >>> OrderedSet( reversed( s ) )
+ OrderedSet(['d', 'c', 'r', 'b', 'a'])
+ >>> s.pop()
+ 'd'
+ >>> s
+ OrderedSet(['a', 'b', 'r', 'c'])
+ >>> s.discard('b')
+ >>> s
+ OrderedSet(['a', 'r', 'c'])
+ >>> s.pop( last=False )
+ 'a'
+ >>> s
+ OrderedSet(['r', 'c'])
+ >>> s.union( t )
+ >>> s
+ OrderedSet(['r', 'c', 's', 'i', 'm', 'a', 'l', 'b'])
+
+ >>> s = OrderedSet()
+ >>> s
+ OrderedSet()
+ >>> s.pop()
+ Traceback (most recent call last):
+ ....
+ KeyError: 'set is empty'
+ >>> OrderedSet( "aba" ) == OrderedSet( "ab" )
+ True
+ >>> OrderedSet( "aba" ) == OrderedSet( "abc" )
+ False
+ >>> OrderedSet( "aba" ) == OrderedSet( "ba" )
+ False
+ >>> OrderedSet( "aba" ) == set( "ba" )
+ True
+ """
+
+ def __init__( self, iterable=None ):
+ self.end = end = [ ]
+ end += [ None, end, end ] # sentinel node for doubly linked list
+ self.map = { } # key --> [key, prev, next]
+ if iterable is not None:
+ self |= iterable
+
+ def __len__( self ):
+ return len( self.map )
+
+ def __contains__( self, key ):
+ return key in self.map
+
+ def add( self, key ):
+ if key not in self.map:
+ end = self.end
+ curr = end[ 1 ]
+ curr[ 2 ] = end[ 1 ] = self.map[ key ] = [ key, curr, end ]
+
+ def discard( self, key ):
+ if key in self.map:
+ key, prev, next = self.map.pop( key )
+ prev[ 2 ] = next
+ next[ 1 ] = prev
+
+ def __iter__( self ):
+ end = self.end
+ curr = end[ 2 ]
+ while curr is not end:
+ yield curr[ 0 ]
+ curr = curr[ 2 ]
+
+ def __reversed__( self ):
+ end = self.end
+ curr = end[ 1 ]
+ while curr is not end:
+ yield curr[ 0 ]
+ curr = curr[ 1 ]
+
+ def pop( self, last=True ):
+ if not self:
+ raise KeyError( 'set is empty' )
+ key = self.end[ 1 ][ 0 ] if last else self.end[ 2 ][ 0 ]
+ self.discard( key )
+ return key
+
+ def __repr__( self ):
+ if not self:
+ return '%s()' % (self.__class__.__name__,)
+ return '%s(%r)' % (self.__class__.__name__, list( self ))
+
+ def __eq__( self, other ):
+ if isinstance( other, OrderedSet ):
+ return len( self ) == len( other ) and list( self ) == list( other )
+ return set( self ) == set( other )
+
+ def union(self,other):
+ self |= other
+
+
+def rindex( l, v ):
+ """
+ Like l.index(v) but finds last occurrence of value v in sequence l.
+
+ :type l: anything
+
+ >>> rindex( [0], 0 )
+ 0
+ >>> rindex( [0,0], 0 )
+ 1
+ >>> rindex( [0,1], 0 )
+ 0
+ >>> rindex( [0,1,0,1], 0 )
+ 2
+ >>> rindex( [0,1,0,1], 1 )
+ 3
+ >>> rindex( [0], 1 )
+ Traceback (most recent call last):
+ ...
+ ValueError: 1
+ >>> rindex( [None], None )
+ 0
+ >>> rindex( [], None )
+ Traceback (most recent call last):
+ ...
+ ValueError: None
+ >>> rindex( "0101", '0')
+ 2
+ >>> rindex( (0,1,0,1), 0 )
+ 2
+ >>> rindex( xrange(3), 2 )
+ 2
+ """
+ try:
+ n = next( dropwhile( lambda (i, x): v != x, enumerate( reversed( l ), 1 ) ) )[ 0 ]
+ except StopIteration:
+ raise ValueError( v )
+ else:
+ return len( l ) - n
diff --git a/src/bd2k/util/d32.py b/src/bd2k/util/d32.py
new file mode 100644
index 0000000..100eacb
--- /dev/null
+++ b/src/bd2k/util/d32.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Inspired by Dominic Tarr's JavaScript at https://github.com/dominictarr/d64
+
+class D32( object ):
+ """
+ Base32 encoding and decoding without padding, and using an arbitrary alphabet.
+ """
+
+ def __init__( self, alphabet ):
+ super( D32, self ).__init__( )
+ self.alphabet = bytearray( alphabet )
+ self.lookup = bytearray( 255 )
+ for i in xrange( 32 ):
+ self.lookup[ self.alphabet[ i ] ] = i
+
+ def encode( self, d ):
+ """
+ >>> encode = standard.encode
+ >>> encode('')
+ ''
+ >>> encode('\\0')
+ '22'
+ >>> encode('\\xff')
+ 'zw'
+ >>> encode('\\0\\1\\2\\3\\4')
+ '222k62s6'
+ >>> encode('\\0\\1\\2\\3\\4\\5')
+ '222k62s62o'
+ """
+ m = len( d )
+ n = (m * 8 + 4) / 5
+ padding = 8 - n % 8
+ e = bytearray( n + padding )
+ i, j = 0, 0
+ a = self.alphabet
+
+ while i < m:
+ if m - i < 5:
+ g = bytearray( d[ i: ] + '\0' * (5 - (m - i)) )
+ else:
+ g = bytearray( d[ i:i + 5 ] )
+ # bit 1 2 3
+ # bit 01234567 89012345 67890123 45678901 23456789
+ # byte 00000000 11111111 22222222 33333333 44444444
+ # group 00000111 11222223 33334444 45555566 66677777
+ e[ j + 0 ] = a[ g[ 0 ] >> 3 ]
+ e[ j + 1 ] = a[ g[ 0 ] << 2 & 31 | g[ 1 ] >> 6 ]
+ e[ j + 2 ] = a[ g[ 1 ] >> 1 & 31 ]
+ e[ j + 3 ] = a[ g[ 1 ] << 4 & 31 | g[ 2 ] >> 4 ]
+ e[ j + 4 ] = a[ g[ 2 ] << 1 & 31 | g[ 3 ] >> 7 ]
+ e[ j + 5 ] = a[ g[ 3 ] >> 2 & 31 ]
+ e[ j + 6 ] = a[ g[ 3 ] << 3 & 31 | g[ 4 ] >> 5 ]
+ e[ j + 7 ] = a[ g[ 4 ] & 31 ]
+ j += 8
+ i += 5
+ return str( e[ :-padding ] )
+
+ def decode( self, e ):
+ """
+ >>> decode = standard.decode
+
+ # >>> decode('222k62s62o')
+ # '\\x00\\x01\\x02\\x03\\x04\\x05'
+ # >>> decode('222k62s6')
+ # '\\x00\\x01\\x02\\x03\\x04'
+ >>> decode('zw')
+ '\\xff'
+ """
+ n = len( e )
+ m = n * 5 / 8
+ padding = 5 - m % 5
+ d = bytearray( m + padding )
+ i, j = 0, 0
+ l = self.lookup
+ while j < n:
+ if n - j < 8:
+ g = [ l[ ord( x ) ] for x in e[ j: ] ] + [ 0 ] * (8 - (n - j))
+ else:
+ g = [ l[ ord( x ) ] for x in e[ j:j + 8 ] ]
+ # bit 1 2 3
+ # bit 01234567 89012345 67890123 45678901 23456789
+ # byte 00000000 11111111 22222222 33333333 44444444
+ # group 00000111 11222223 33334444 45555566 66677777
+ d[ i + 0 ] = g[ 0 ] << 3 & 255 | g[ 1 ] >> 2
+ d[ i + 1 ] = g[ 1 ] << 6 & 255 | g[ 2 ] << 1 & 255 | g[ 3 ] >> 4
+ d[ i + 2 ] = g[ 3 ] << 4 & 255 | g[ 4 ] >> 1
+ d[ i + 3 ] = g[ 4 ] << 7 & 255 | g[ 5 ] << 2 & 255 | g[ 6 ] >> 3
+ d[ i + 4 ] = g[ 6 ] << 5 & 255 | g[ 7 ]
+ j += 8
+ i += 5
+ return str( d[ :-padding ] )
+
+
+# A variant of Base64 that maintains the lexicographical ordering such that for any given list of
+# string l, map( decode, sorted( map( standard.encode, l ) ) == sorted( l )
+
+standard = D32( '234567abcdefghijklmnopqrstuvwxyz' )
+
+# A reimplementation of base64.b32encode and base64.b32encode, but faster and without padding:
+
+base32 = D32( 'abcdefghijklmnopqrstuvwxyz234567' )
diff --git a/src/bd2k/util/d64.py b/src/bd2k/util/d64.py
new file mode 100644
index 0000000..c77d967
--- /dev/null
+++ b/src/bd2k/util/d64.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Ported from JS found at https://github.com/dominictarr/d64
+
+
+
+class D64( object ):
+ def __init__( self, special_chars ):
+ super( D64, self ).__init__( )
+ alphabet = 'PYFGCRLAOEUIDHTNSQJKXBMWVZpyfgcrlaoeuidhtnsqjkxbmwvz1234567890'
+ self.alphabet = bytearray( sorted( alphabet + special_chars ) )
+ self.lookup = bytearray( 255 )
+ for i in xrange( 64 ):
+ code = self.alphabet[ i ]
+ self.lookup[ code ] = i
+
+ def encode( self, data ):
+ """
+ >>> encode = standard.encode
+ >>> encode('')
+ ''
+ >>> encode('\\x00')
+ '..'
+ >>> encode('\\x00\\x01')
+ '..3'
+ >>> encode('\\x00\\x01\\x02')
+ '..31'
+ >>> encode('\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07')
+ '..31.kF40VR'
+ """
+ l = len( data )
+ s = bytearray( (l * 4 + 2) / 3 )
+ hang = 0
+ j = 0
+ a = self.alphabet
+ for i in xrange( l ):
+ v = ord( data[ i ] )
+ r = i % 3
+ if r == 0:
+ s[ j ] = a[ v >> 2 ]
+ j += 1
+ hang = (v & 3) << 4
+ elif r == 1:
+ s[ j ] = a[ hang | v >> 4 ]
+ j += 1
+ hang = (v & 0xf) << 2
+ elif r == 2:
+ s[ j ] = a[ hang | v >> 6 ]
+ j += 1
+ s[ j ] = a[ v & 0x3f ]
+ j += 1
+ hang = 0
+ else:
+ assert False
+ if l % 3:
+ s[ j ] = a[ hang ]
+
+ return str( s )
+
+ def decode( self, e ):
+ """
+ >>> decode = standard.decode
+ >>> decode('')
+ ''
+ >>> decode('..')
+ '\\x00'
+ >>> decode('..3')
+ '\\x00\\x01'
+ >>> decode('..31')
+ '\\x00\\x01\\x02'
+ >>> decode('..31.kF40VR')
+ '\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07'
+ """
+ n = len( e )
+ j = 0
+ b = bytearray( n * 3 / 4 )
+ hang = 0
+ l = self.lookup
+
+ for i in xrange( n ):
+ v = l[ ord( e[ i ] ) ]
+ r = i % 4
+ if r == 0:
+ hang = v << 2
+ elif r == 1:
+ b[ j ] = hang | v >> 4
+ j += 1
+ hang = (v << 4) & 0xFF
+ elif r == 2:
+ b[ j ] = hang | v >> 2
+ j += 1
+ hang = (v << 6) & 0xFF
+ elif r == 3:
+ b[ j ] = hang | v
+ j += 1
+ else:
+ assert False
+ return str( b )
+
+
+standard = D64( '._' )
diff --git a/src/bd2k/util/ec2/__init__.py b/src/bd2k/util/ec2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/ec2/credentials.py b/src/bd2k/util/ec2/credentials.py
new file mode 100644
index 0000000..58b0a5d
--- /dev/null
+++ b/src/bd2k/util/ec2/credentials.py
@@ -0,0 +1,155 @@
+import errno
+import logging
+import threading
+import time
+from datetime import datetime
+
+import os
+from bd2k.util.files import mkdir_p
+
+log = logging.getLogger( __name__ )
+
+cache_path = '~/.cache/aws/cached_temporary_credentials'
+
+datetime_format = "%Y-%m-%dT%H:%M:%SZ" # incidentally the same as the format used by AWS
+
+
+def datetime_to_str( dt ):
+ """
+ Convert a naive (implicitly UTC) datetime object into a string, explicitly UTC.
+
+ >>> datetime_to_str( datetime( 1970, 1, 1, 0, 0, 0 ) )
+ '1970-01-01T00:00:00Z'
+ """
+ return dt.strftime( datetime_format )
+
+
+def str_to_datetime( s ):
+ """
+ Convert a string, explicitly UTC into a naive (implicitly UTC) datetime object.
+
+ >>> str_to_datetime( '1970-01-01T00:00:00Z' )
+ datetime.datetime(1970, 1, 1, 0, 0)
+
+ Just to show that the constructor args for seconds and microseconds are optional:
+ >>> datetime(1970, 1, 1, 0, 0, 0)
+ datetime.datetime(1970, 1, 1, 0, 0)
+ """
+ return datetime.strptime( s, datetime_format )
+
+
+monkey_patch_lock = threading.RLock( )
+_populate_keys_from_metadata_server_orig = None
+
+
+def enable_metadata_credential_caching( ):
+ """
+ Monkey-patches Boto to allow multiple processes using it to share one set of cached, temporary
+ IAM role credentials. This helps avoid hitting request limits imposed on the metadata service
+ when too many processes concurrently request those credentials. Function is idempotent.
+
+ This function should be called before any AWS connections attempts are made with Boto.
+ """
+ global _populate_keys_from_metadata_server_orig
+ with monkey_patch_lock:
+ if _populate_keys_from_metadata_server_orig is None:
+ from boto.provider import Provider
+ _populate_keys_from_metadata_server_orig = Provider._populate_keys_from_metadata_server
+ Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server
+
+
+def disable_metadata_credential_caching( ):
+ """
+ Reverse the effect of enable_metadata_credential_caching()
+ """
+ global _populate_keys_from_metadata_server_orig
+ with monkey_patch_lock:
+ if _populate_keys_from_metadata_server_orig is not None:
+ from boto.provider import Provider
+ Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server_orig
+ _populate_keys_from_metadata_server_orig = None
+
+
+def _populate_keys_from_metadata_server( self ):
+ global _populate_keys_from_metadata_server_orig
+ path = os.path.expanduser( cache_path )
+ tmp_path = path + '.tmp'
+ while True:
+ log.debug( 'Attempting to read cached credentials from %s.', path )
+ try:
+ with open( path, 'r' ) as f:
+ content = f.read( )
+ if content:
+ record = content.split( '\n' )
+ assert len(record) == 4
+ self._access_key = record[ 0 ]
+ self._secret_key = record[ 1 ]
+ self._security_token = record[ 2 ]
+ self._credential_expiry_time = str_to_datetime( record[ 3 ] )
+ else:
+ log.debug( '%s is empty. Credentials are not temporary.', path )
+ return
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ log.debug( 'Cached credentials are missing.' )
+ dir_path = os.path.dirname( path )
+ if not os.path.exists( dir_path ):
+ log.debug( 'Creating parent directory %s', dir_path )
+ # A race would be ok at this point
+ mkdir_p( dir_path )
+ else:
+ raise
+ else:
+ if self._credentials_need_refresh( ):
+ log.debug( 'Cached credentials are expired.' )
+ else:
+ log.debug( 'Cached credentials exist and are still fresh.' )
+ return
+ # We get here if credentials are missing or expired
+ log.debug( 'Racing to create %s.', tmp_path )
+ # Only one process, the winner, will succeed
+ try:
+ fd = os.open( tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0600 )
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ log.debug( 'Lost the race to create %s. Waiting on winner to remove it.', tmp_path )
+ while os.path.exists( tmp_path ):
+ time.sleep( .1 )
+ log.debug( 'Winner removed %s. Trying from the top.', tmp_path )
+ else:
+ raise
+ else:
+ try:
+ log.debug( 'Won the race to create %s. '
+ 'Requesting credentials from metadata service.', tmp_path )
+ _populate_keys_from_metadata_server_orig( self )
+ except:
+ os.close( fd )
+ fd = None
+ log.debug( 'Failed to obtain credentials, removing %s.', tmp_path )
+ # This unblocks the loosers.
+ os.unlink( tmp_path )
+ # Bail out. It's too likely to happen repeatedly
+ raise
+ else:
+ if self._credential_expiry_time is None:
+ os.close( fd )
+ fd = None
+ log.debug( 'Credentials are not temporary. '
+ 'Leaving %s empty and renaming it to %s.', tmp_path, path )
+ else:
+ log.debug( 'Writing credentials to %s.', tmp_path )
+ with os.fdopen( fd, 'w' ) as fh:
+ fd = None
+ fh.write( '\n'.join( [
+ self._access_key,
+ self._secret_key,
+ self._security_token,
+ datetime_to_str( self._credential_expiry_time ) ] ) )
+ log.debug( 'Wrote credentials to %s. '
+ 'Renaming it to %s.', tmp_path, path )
+ os.rename( tmp_path, path )
+ return
+ finally:
+ if fd is not None:
+ os.close( fd )
diff --git a/src/bd2k/util/ec2/test/__init__.py b/src/bd2k/util/ec2/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/ec2/test/test_credentials.py b/src/bd2k/util/ec2/test/test_credentials.py
new file mode 100644
index 0000000..1fa713f
--- /dev/null
+++ b/src/bd2k/util/ec2/test/test_credentials.py
@@ -0,0 +1,83 @@
+import logging
+
+import errno
+
+import os
+import unittest
+
+from bd2k.util.ec2.credentials import (enable_metadata_credential_caching,
+ disable_metadata_credential_caching, cache_path)
+
+
+def get_access_key( ):
+ from boto.provider import Provider
+ provider = Provider( 'aws' )
+ return None if provider._credential_expiry_time is None else provider.get_access_key( )
+
+
+class CredentialsTest( unittest.TestCase ):
+ def __init__( self, *args, **kwargs ):
+ super( CredentialsTest, self ).__init__( *args, **kwargs )
+ self.cache_path = os.path.expanduser( cache_path )
+
+ @classmethod
+ def setUpClass( cls ):
+ super( CredentialsTest, cls ).setUpClass( )
+ logging.basicConfig( level=logging.DEBUG )
+
+ def setUp( self ):
+ super( CredentialsTest, self ).setUp( )
+ self.cleanUp( )
+
+ def cleanUp( self ):
+ try:
+ os.unlink( self.cache_path )
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ pass
+ else:
+ raise
+
+ def tearDown( self ):
+ super( CredentialsTest, self ).tearDown( )
+ self.cleanUp( )
+
+ def test_metadata_credential_caching( self ):
+ """
+ Brute forces many concurrent requests for getting temporary credentials. If you comment
+ out the calls to enable_metadata_credential_caching, you should see some failures due to
+ requests timing out. The test will also take much longer in that case.
+ """
+ num_tests = 1000
+ num_processes = 32
+ # Get key without caching
+ access_key = get_access_key( )
+ self.assertFalse( os.path.exists( self.cache_path ) )
+ enable_metadata_credential_caching( )
+ # Again for idempotence
+ enable_metadata_credential_caching( )
+ try:
+ futures = [ ]
+ from multiprocessing import Pool
+ pool = Pool( num_processes )
+ try:
+ for i in range( num_tests ):
+ futures.append( pool.apply_async( get_access_key ) )
+ except:
+ pool.close( )
+ pool.terminate( )
+ raise
+ else:
+ pool.close( )
+ pool.join( )
+ finally:
+ disable_metadata_credential_caching( )
+ # Again for idempotence
+ disable_metadata_credential_caching( )
+ self.assertEquals( access_key is not None, os.path.exists( self.cache_path ) )
+ self.assertEquals( len( futures ), num_tests )
+ access_keys = [ f.get( ) for f in futures ]
+ self.assertEquals( len( access_keys ), num_tests )
+ access_keys = set( access_keys )
+ self.assertEquals( len( access_keys ), 1 )
+ self.assertEquals( access_keys.pop( ), access_key )
diff --git a/src/bd2k/util/exceptions.py b/src/bd2k/util/exceptions.py
new file mode 100644
index 0000000..1a8885a
--- /dev/null
+++ b/src/bd2k/util/exceptions.py
@@ -0,0 +1,81 @@
+from contextlib import contextmanager
+import sys
+
+
+class panic( object ):
+ """
+ The Python idiom for reraising a primary exception fails when the except block raises a
+ secondary exception, e.g. while trying to cleanup. In that case the original exception is
+ lost and the secondary exception is reraised. The solution seems to be to save the primary
+ exception info as returned from sys.exc_info() and then reraise that.
+
+ This is a contextmanager that should be used like this
+
+ try:
+ # do something that can fail
+ except:
+ with panic( log ):
+ # do cleanup that can also fail
+
+ If a logging logger is passed to panic(), any secondary Exception raised within the with
+ block will be logged. Otherwise those exceptions are swallowed. At the end of the with block
+ the primary exception will be reraised.
+ """
+
+ def __init__( self, log=None ):
+ super( panic, self ).__init__( )
+ self.log = log
+ self.exc_info = None
+
+ def __enter__( self ):
+ self.exc_info = sys.exc_info( )
+
+ def __exit__( self, *exc_info ):
+ if self.log is not None and exc_info and exc_info[ 0 ]:
+ self.log.warn( "Exception during panic", exc_info=exc_info )
+ exc_type, exc_value, traceback = self.exc_info
+ raise exc_type, exc_value, traceback
+
+
+class RequirementError( Exception ):
+ """
+ The expcetion raised bye require(). Where AssertionError is raised when there is likely an
+ internal problem within the code base, i.e. a bug, an instance of this class is raised when
+ the cause lies outside the code base, e.g. with the user or caller.
+ """
+ pass
+
+
+def require( value, message, *message_args ):
+ """
+ Raise RequirementError with the given message if the given value is considered false. See
+ https://docs.python.org/2/library/stdtypes.html#truth-value-testing for a defintiion of which
+ values are false. This function is commonly used for validating user input. It is meant to be
+ complimentary to assert. See RequirementError for more on that.
+
+ :param Any value: the value to be tested
+ :param message:
+ :param message_args: optional values for % formatting the given message
+ :return:
+
+ >>> require(1 + 1 == 2, 'You made a terrible mistake')
+
+ >>> require(1 + 1 == 3, 'You made a terrible mistake')
+ Traceback (most recent call last):
+ ...
+ RequirementError: You made a terrible mistake
+
+ >>> require(1 + 1 == 3, 'You made a terrible mistake, %s', 'you fool')
+ Traceback (most recent call last):
+ ...
+ RequirementError: You made a terrible mistake, you fool
+
+ >>> require(1 + 1 == 3, 'You made a terrible mistake, %s %s', 'your', 'majesty')
+ Traceback (most recent call last):
+ ...
+ RequirementError: You made a terrible mistake, your majesty
+ """
+ if not value:
+ if message_args:
+ message = message % message_args
+ raise RequirementError( message)
diff --git a/src/bd2k/util/expando.py b/src/bd2k/util/expando.py
new file mode 100644
index 0000000..f741f2a
--- /dev/null
+++ b/src/bd2k/util/expando.py
@@ -0,0 +1,117 @@
+class Expando(dict):
+ """
+ Pass inital attributes to the constructor:
+
+ >>> o = Expando(foo=42)
+ >>> o.foo
+ 42
+
+ Dynamically create new attributes:
+
+ >>> o.bar = 'hi'
+ >>> o.bar
+ 'hi'
+
+ Expando is a dictionary:
+
+ >>> isinstance(o,dict)
+ True
+ >>> o['foo']
+ 42
+
+ Works great with JSON:
+
+ >>> import json
+ >>> s='{"foo":42}'
+ >>> o = json.loads(s,object_hook=Expando)
+ >>> o
+ {u'foo': 42}
+ >>> o.foo
+ 42
+ >>> o.bar = 'hi'
+ >>> o
+ {u'foo': 42, 'bar': 'hi'}
+
+ And since Expando is a dict, it serializes back to JSON just fine:
+
+ >>> json.dumps(o)
+ '{"foo": 42, "bar": "hi"}'
+
+ Attributes can be deleted, too:
+
+ >>> o = Expando(foo=42)
+ >>> o.foo
+ 42
+ >>> del o.foo
+ >>> o.foo
+ Traceback (most recent call last):
+ ...
+ AttributeError: 'Expando' object has no attribute 'foo'
+ >>> o['foo']
+ Traceback (most recent call last):
+ ...
+ KeyError: 'foo'
+
+ >>> del o.foo
+ Traceback (most recent call last):
+ ...
+ AttributeError: foo
+
+ And copied:
+
+ >>> o = Expando(foo=42)
+ >>> p = o.copy()
+ >>> isinstance(p,Expando)
+ True
+ >>> o == p
+ True
+ >>> o is p
+ False
+
+ Same with MagicExpando ...
+
+ >>> o = MagicExpando()
+ >>> o.foo.bar = 42
+ >>> p = o.copy()
+ >>> isinstance(p,MagicExpando)
+ True
+ >>> o == p
+ True
+ >>> o is p
+ False
+
+ ... but the copy is shallow:
+
+ >>> o.foo is p.foo
+ True
+ """
+
+ def __init__( self, *args, **kwargs ):
+ super( Expando, self ).__init__( *args, **kwargs )
+ self.__slots__ = None
+ self.__dict__ = self
+
+ def copy(self):
+ return type(self)(self)
+
+class MagicExpando(Expando):
+ """
+ Use MagicExpando for chained attribute access. The first time a missing attribute is
+ accessed, it will be set to a new child MagicExpando.
+
+ >>> o=MagicExpando()
+ >>> o.foo = 42
+ >>> o
+ {'foo': 42}
+ >>> o.bar.hello = 'hi'
+ >>> o
+ {'foo': 42, 'bar': {'hello': 'hi'}}
+ """
+ def __getattribute__( self, name ):
+ try:
+ return super( Expando, self ).__getattribute__( name )
+ except AttributeError:
+ child = self.__class__( )
+ self[name] = child
+ return child
+
diff --git a/src/bd2k/util/files.py b/src/bd2k/util/files.py
new file mode 100644
index 0000000..da1ffd3
--- /dev/null
+++ b/src/bd2k/util/files.py
@@ -0,0 +1,106 @@
+import errno
+import os
+
+
+def mkdir_p( path ):
+ """
+ The equivalent of mkdir -p
+ """
+ try:
+ os.makedirs( path )
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir( path ):
+ pass
+ else:
+ raise
+
+
+def rm_f( path ):
+ """
+ Remove the file at the given path with os.remove(), ignoring errors caused by the file's absence.
+ """
+ try:
+ os.remove( path )
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ pass
+ else:
+ raise
+
+
+def copyfileobj( src, dst, limit=None, bufsize=1024 * 1024 ):
+ """
+ Copy the contents of one file object to another file object. If limit is given, stop after at
+ most limit bytes were copied. The copying will begin at the current file pointer of each file
+ object.
+
+ :param src: the file object to copy from
+
+ :param dst: the file object to copy to
+
+ :param limit: the maximum number of bytes to copy or None if all remaining bytes in src
+ should be copied
+
+ :param bufsize: the size of the intermediate copy buffer. No more than that many bytes will
+ ever be read from src or written to dst at any one time.
+
+ :return: None if limit is None, otherwise the difference between limit and the number of
+ bytes actually copied. This will be > 0 if and only if the source file hit EOF before limit
+ number of bytes could be read.
+
+ >>> import tempfile
+ >>> with open('/dev/urandom') as f1:
+ ... with tempfile.TemporaryFile() as f2:
+ ... copyfileobj(f1,f2,limit=100)
+ ... f2.seek(60)
+ ... with tempfile.TemporaryFile() as f3:
+ ... copyfileobj(f2,f3), f2.tell(), f3.tell()
+ (None, 100, 40)
+ """
+ while limit is None or limit > 0:
+ buf = src.read( bufsize if limit is None or bufsize < limit else limit )
+ if buf:
+ if limit is not None:
+ limit -= len( buf )
+ assert limit >= 0
+ dst.write( buf )
+ else:
+ return limit
+
+
+if False:
+ # These are not needed for Python 2.7 as Python's builtin file object's read() and write()
+ # method are greedy. For Python 3.x these may be useful.
+
+ def gread( readable, n ):
+ """
+ Greedy read. Read until readable is exhausted, and error occurs or the given number of bytes
+ have been read. If it returns fewer than the requested number bytes if and only if the end of
+ file has been reached.
+
+ :type readable: io.FileIO
+ """
+ bufs = [ ]
+ i = 0
+ while i < n:
+ buf = readable.read( n - i )
+ m = len( buf )
+ if m == 0:
+ break
+ bufs.append( buf )
+ i += m
+ return ''.join( bufs )
+
+
+ def gwrite( writable, buf ):
+ """
+ Greedy write. Write until the entire buffer has been written to or an error occurs.
+
+ :type writable: io.FileIO[str|bytearray]
+
+ :type buf: str|bytearray
+ """
+ n = len( buf )
+ i = 0
+ while i < n:
+ i += writable.write( buf[ i: ] )
diff --git a/src/bd2k/util/fnmatch.py b/src/bd2k/util/fnmatch.py
new file mode 100644
index 0000000..f6d335e
--- /dev/null
+++ b/src/bd2k/util/fnmatch.py
@@ -0,0 +1,149 @@
+# Same as Python's fnmatch with the following diferences:
+# - '/' doesn't match '*'
+# - added '**' to match anything
+# - added some unit tests
+
+"""Filename matching with shell patterns.
+
+fnmatch(FILENAME, PATTERN) matches according to the local convention.
+fnmatchcase(FILENAME, PATTERN) always takes case in account.
+
+The functions operate by translating the pattern into a regular
+expression. They cache the compiled regular expressions for speed.
+
+The function translate(PATTERN) returns a regular expression
+corresponding to PATTERN. (It does not compile it.)
+"""
+
+import re
+
+__all__ = [ "filter", "fnmatch", "fnmatchcase", "translate" ]
+
+_cache = { }
+_MAXCACHE = 100
+
+
+def _purge( ):
+ """Clear the pattern cache"""
+ _cache.clear( )
+
+
+def fnmatch( name, pat ):
+ """Test whether FILENAME matches PATTERN.
+
+ Patterns are Unix shell style:
+
+ * matches everything
+ ? matches any single character
+ [seq] matches any character in seq
+ [!seq] matches any char not in seq
+
+ An initial period in FILENAME is not special.
+ Both FILENAME and PATTERN are first case-normalized
+ if the operating system requires it.
+ If you don't want this, use fnmatchcase(FILENAME, PATTERN).
+
+ >>> fnmatch('bar', '*' )
+ True
+ >>> fnmatch('foo/bar', '*' )
+ False
+ >>> fnmatch('foo/bar', '**' )
+ True
+ >>> fnmatch('foo/bar', '*/*' )
+ True
+ >>> fnmatch('foo/bar', '**/*' )
+ True
+ >>> fnmatch('/bar', '**/*' )
+ True
+ >>> fnmatch('/', '**' )
+ True
+ >>> fnmatch('/', '*' )
+ False
+ """
+
+ import os
+
+ name = os.path.normcase( name )
+ pat = os.path.normcase( pat )
+ return fnmatchcase( name, pat )
+
+
+def filter( names, pat ):
+ """Return the subset of the list NAMES that match PAT"""
+ import os, posixpath
+
+ result = [ ]
+ pat = os.path.normcase( pat )
+ if not pat in _cache:
+ res = translate( pat )
+ if len( _cache ) >= _MAXCACHE:
+ _cache.clear( )
+ _cache[ pat ] = re.compile( res )
+ match = _cache[ pat ].match
+ if os.path is posixpath:
+ # normcase on posix is NOP. Optimize it away from the loop.
+ for name in names:
+ if match( name ):
+ result.append( name )
+ else:
+ for name in names:
+ if match( os.path.normcase( name ) ):
+ result.append( name )
+ return result
+
+
+def fnmatchcase( name, pat ):
+ """Test whether FILENAME matches PATTERN, including case.
+
+ This is a version of fnmatch() which doesn't case-normalize
+ its arguments.
+ """
+
+ if not pat in _cache:
+ res = translate( pat )
+ if len( _cache ) >= _MAXCACHE:
+ _cache.clear( )
+ _cache[ pat ] = re.compile( res )
+ return _cache[ pat ].match( name ) is not None
+
+
+def translate( pat ):
+ """Translate a shell PATTERN to a regular expression.
+
+ There is no way to quote meta-characters.
+ """
+
+ i, n = 0, len( pat )
+ res = ''
+ while i < n:
+ c = pat[ i ]
+ i += 1
+ if c == '*':
+ if i < len(pat) and pat[i] == '*':
+ i += 1
+ res += '.*'
+ else:
+ res += '[^/]*'
+ elif c == '?':
+ res += '.'
+ elif c == '[':
+ j = i
+ if j < n and pat[ j ] == '!':
+ j += 1
+ if j < n and pat[ j ] == ']':
+ j += 1
+ while j < n and pat[ j ] != ']':
+ j += 1
+ if j >= n:
+ res += '\\['
+ else:
+ stuff = pat[ i:j ].replace( '\\', '\\\\' )
+ i = j + 1
+ if stuff[ 0 ] == '!':
+ stuff = '^' + stuff[ 1: ]
+ elif stuff[ 0 ] == '^':
+ stuff = '\\' + stuff
+ res = '%s[%s]' % (res, stuff)
+ else:
+ res = res + re.escape( c )
+ return res + '\Z(?ms)'
diff --git a/src/bd2k/util/hashes.py b/src/bd2k/util/hashes.py
new file mode 100644
index 0000000..29e6aae
--- /dev/null
+++ b/src/bd2k/util/hashes.py
@@ -0,0 +1,132 @@
+def hash_json( hash_obj, value ):
+ """
+ Compute the hash of a parsed JSON value using the given hash object. This function does not
+ hash the JSON value, it hashes the object tree that is the result of parsing a string in JSON
+ format. Hashables (JSON objects) are hashed entry by entry in order of the lexicographical
+ ordering on the keys. Iterables are hashed in their inherent order.
+
+ If value or any of its children is an iterable with non-deterministic ordering of its
+ elements, e.g. a set, this method will yield non-deterministic results.
+
+ :param hash_obj: one of the Hash objects in hashlib, or any other object that has an update(s)
+ method accepting a single string.
+
+ :type value: int|str|float|Iterable[type(obj)]|Hashable[str,type(obj)]
+ :param value: The value to be hashed
+
+ >>> import hashlib
+ >>> def actual(x): h = hashlib.md5(); hash_json(h,x); return h.hexdigest()
+ >>> def expect(s): h = hashlib.md5(); h.update(s); return h.hexdigest()
+
+ >>> actual(0) == expect('0')
+ True
+ >>> actual(0.0) == expect('0.0')
+ True
+ >>> actual(0.1) == expect('0.1')
+ True
+ >>> actual(True) == expect('true')
+ True
+ >>> actual(False) == expect('false')
+ True
+ >>> actual("") == expect('""')
+ True
+ >>> actual([]) == expect('[]')
+ True
+ >>> actual([0]) == expect('[0]')
+ True
+ >>> actual([0,1]) == expect('[0,1]')
+ True
+ >>> actual({}) == expect('{}')
+ True
+ >>> actual({'':0}) == expect('{:0}')
+ True
+ >>> actual({'0':0}) == expect('{0:0}')
+ True
+ >>> actual({'0':0,'1':1}) == expect('{0:0,1:1}')
+ True
+ >>> actual({'':[]}) == expect('{:[]}')
+ True
+ >>> actual([{}]) == expect('[{}]')
+ True
+ >>> actual({0:0})
+ Traceback (most recent call last):
+ ...
+ ValueError: Dictionary keys must be strings, not <type 'int'>
+ >>> actual(object())
+ Traceback (most recent call last):
+ ...
+ ValueError: Type <type 'object'> is not supported
+ """
+ try:
+ items = value.iteritems( )
+ except AttributeError:
+ # Must check for string before testing iterability since strings are iterable
+ if isinstance( value, basestring ):
+ _hash_string( hash_obj, value )
+ else:
+ try:
+ iterator = iter( value )
+ except TypeError:
+ # We must check for bool first since it is subclass of int (wrongly, IMHO)
+ if isinstance( value, bool ):
+ _hash_bool( hash_obj, value )
+ elif isinstance( value, (int, float) ):
+ _hash_number( hash_obj, value )
+ else:
+ raise ValueError( 'Type %s is not supported' % type( value ) )
+ else:
+ _hash_iterable( hash_obj, iterator )
+ else:
+ _hash_hashable( hash_obj, items )
+
+
+def _hash_number( hash_obj, n ):
+ hash_obj.update( str( n ) )
+
+
+def _hash_bool( hash_obj, b ):
+ hash_obj.update( 'true' if b else 'false' )
+
+
+def _hash_string( hash_obj, s ):
+ hash_obj.update( '"' )
+ hash_obj.update( s )
+ hash_obj.update( '"' )
+
+
+def _hash_iterable( hash_obj, items ):
+ hash_obj.update( '[' )
+ try:
+ item = next( items )
+ hash_json( hash_obj, item )
+ while True:
+ item = next( items )
+ hash_obj.update( ',' )
+ hash_json( hash_obj, item )
+ except StopIteration:
+ pass
+ hash_obj.update( ']' )
+
+
+def _hash_hashable( hash_obj, items ):
+ items = iter( sorted( items ) )
+ hash_obj.update( '{' )
+ try:
+ item = next( items )
+ _hash_hashable_item( hash_obj, item )
+ while True:
+ item = next( items )
+ hash_obj.update( ',' )
+ _hash_hashable_item( hash_obj, item )
+ except StopIteration:
+ pass
+ hash_obj.update( '}' )
+
+
+def _hash_hashable_item( hash_obj, (k, v) ):
+ if isinstance( k, basestring ):
+ hash_obj.update( k )
+ hash_obj.update( ':' )
+ hash_json( hash_obj, v )
+ else:
+ raise ValueError( 'Dictionary keys must be strings, not %s' % type( k ) )
diff --git a/src/bd2k/util/humanize.py b/src/bd2k/util/humanize.py
new file mode 100644
index 0000000..32b8a3b
--- /dev/null
+++ b/src/bd2k/util/humanize.py
@@ -0,0 +1,131 @@
+# http://code.activestate.com/recipes/578019-bytes-to-human-human-to-bytes-converter/
+
+"""
+Bytes-to-human / human-to-bytes converter.
+Based on: http://goo.gl/kTQMs
+Working with Python 2.x and 3.x.
+
+Author: Giampaolo Rodola' <g.rodola [AT] gmail [DOT] com>
+License: MIT
+"""
+
+# see: http://goo.gl/kTQMs
+SYMBOLS = {
+ 'customary' : ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
+ 'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
+ 'zetta', 'iotta'),
+ 'iec' : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
+ 'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
+ 'zebi', 'yobi'),
+}
+
+def bytes2human(n, fmt='%(value).1f %(symbol)s', symbols='customary'):
+ """
+ Convert n bytes into a human readable string based on format.
+ symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
+ see: http://goo.gl/kTQMs
+
+ >>> bytes2human(0)
+ '0.0 '
+ >>> bytes2human(0.9)
+ '0.0 '
+ >>> bytes2human(1)
+ '1.0 '
+ >>> bytes2human(1.9)
+ '1.0 '
+ >>> bytes2human(1024)
+ '1.0 K'
+ >>> bytes2human(1048576)
+ '1.0 M'
+ >>> bytes2human(1099511627776127398123789121)
+ '909.5 Y'
+
+ >>> bytes2human(9856, symbols="customary")
+ '9.6 K'
+ >>> bytes2human(9856, symbols="customary_ext")
+ '9.6 kilo'
+ >>> bytes2human(9856, symbols="iec")
+ '9.6 Ki'
+ >>> bytes2human(9856, symbols="iec_ext")
+ '9.6 kibi'
+
+ >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
+ '9.8 K/sec'
+
+ >>> # precision can be adjusted by playing with %f operator
+ >>> bytes2human(10000, fmt="%(value).5f %(symbol)s")
+ '9.76562 K'
+ """
+ n = int(n)
+ if n < 0:
+ raise ValueError("n < 0")
+ symbols = SYMBOLS[symbols]
+ prefix = {}
+ for i, s in enumerate(symbols[1:]):
+ prefix[s] = 1 << (i+1)*10
+ for symbol in reversed(symbols[1:]):
+ if n >= prefix[symbol]:
+ value = float(n) / prefix[symbol]
+ return fmt % locals()
+ return fmt % dict(symbol=symbols[0], value=n)
+
+def human2bytes(s):
+ """
+ Attempts to guess the string format based on default symbols
+ set and return the corresponding bytes as an integer.
+ When unable to recognize the format ValueError is raised.
+
+ >>> human2bytes('0 ')
+ 0
+ >>> human2bytes('1 K')
+ 1024
+ >>> human2bytes('1K')
+ 1024
+ >>> human2bytes('1.1K') == 1024 + 102
+ True
+ >>> human2bytes('1 M')
+ 1048576
+ >>> human2bytes('1 Gi')
+ 1073741824
+ >>> human2bytes('1 tera')
+ 1099511627776
+
+ >>> human2bytes('0.5kilo')
+ 512
+ >>> human2bytes('0.1 byte')
+ 0
+ >>> human2bytes('1 byte')
+ 1
+ >>> human2bytes('1 k') # k is an alias for K
+ 1024
+ >>> human2bytes('12 foo')
+ Traceback (most recent call last):
+ ...
+ ValueError: can't interpret '12 foo'
+ """
+ init = s
+ num = ""
+ while s and s[0:1].isdigit() or s[0:1] == '.':
+ num += s[0]
+ s = s[1:]
+ num = float(num)
+ letter = s.strip()
+ for name, sset in SYMBOLS.items():
+ if letter in sset:
+ break
+ else:
+ if letter == 'k':
+ # treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
+ sset = SYMBOLS['customary']
+ letter = letter.upper()
+ else:
+ raise ValueError("can't interpret %r" % init)
+ prefix = {sset[0]:1}
+ for i, s in enumerate(sset[1:]):
+ prefix[s] = 1 << (i+1)*10
+ return int(num * prefix[letter])
+
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
diff --git a/src/bd2k/util/iterables.py b/src/bd2k/util/iterables.py
new file mode 100644
index 0000000..9235753
--- /dev/null
+++ b/src/bd2k/util/iterables.py
@@ -0,0 +1,169 @@
+from itertools import takewhile, izip, izip_longest, dropwhile, imap, chain
+
+
+def common_prefix( xs, ys ):
+ """
+ >>> list( common_prefix('','') )
+ []
+ >>> list( common_prefix('A','') )
+ []
+ >>> list( common_prefix('','A') )
+ []
+ >>> list( common_prefix('A','A') )
+ ['A']
+ >>> list( common_prefix('AB','A') )
+ ['A']
+ >>> list( common_prefix('A','AB') )
+ ['A']
+ >>> list( common_prefix('A','B') )
+ []
+ """
+ return imap( lambda (x, y): x, takewhile( lambda (a, b): a == b, izip( xs, ys ) ) )
+
+
+def disparate_suffix( xs, ys ):
+ """
+ >>> list( disparate_suffix('','') )
+ []
+ >>> list( disparate_suffix('A','') )
+ [('A', None)]
+ >>> list( disparate_suffix('','A') )
+ [(None, 'A')]
+ >>> list( disparate_suffix('A','A') )
+ []
+ >>> list( disparate_suffix('AB','A') )
+ [('B', None)]
+ >>> list( disparate_suffix('A','AB') )
+ [(None, 'B')]
+ >>> list( disparate_suffix('A','B') )
+ [('A', 'B')]
+ """
+ return dropwhile( lambda (a, b): a == b, izip_longest( xs, ys ) )
+
+
+def flatten( iterables ):
+ return chain.from_iterable( iterables )
+
+
+# noinspection PyPep8Naming
+class concat( object ):
+ """
+ A literal iterable that lets you combine sequence literals (lists, set) with generators or list
+ comprehensions. Instead of
+
+ >>> [ -1 ] + [ x * 2 for x in range( 3 ) ] + [ -1 ]
+ [-1, 0, 2, 4, -1]
+
+ you can write
+
+ >>> list( concat( -1, ( x * 2 for x in range( 3 ) ), -1 ) )
+ [-1, 0, 2, 4, -1]
+
+ This is slightly shorter (not counting the list constructor) and does not involve array
+ construction or concatenation.
+
+ Note that concat() flattens (or chains) all iterable arguments into a single result iterable:
+
+ >>> list( concat( 1, xrange( 2, 4 ), 4 ) )
+ [1, 2, 3, 4]
+
+ It only does so one level deep. If you need to recursively flatten a data structure,
+ check out crush().
+
+ If you want to prevent that flattening for an iterable argument, wrap it in concat():
+
+ >>> list( concat( 1, concat( xrange( 2, 4 ) ), 4 ) )
+ [1, xrange(2, 4), 4]
+
+ Some more example.
+
+ >>> list( concat() ) # empty concat
+ []
+ >>> list( concat( 1 ) ) # non-iterable
+ [1]
+ >>> list( concat( concat() ) ) # empty iterable
+ []
+ >>> list( concat( concat( 1 ) ) ) # singleton iterable
+ [1]
+ >>> list( concat( 1, concat( 2 ), 3 ) ) # flattened iterable
+ [1, 2, 3]
+ >>> list( concat( 1, [2], 3 ) ) # flattened iterable
+ [1, 2, 3]
+ >>> list( concat( 1, concat( [2] ), 3 ) ) # protecting an iterable from being flattened
+ [1, [2], 3]
+ >>> list( concat( 1, concat( [2], 3 ), 4 ) ) # protection only works with a single argument
+ [1, 2, 3, 4]
+ >>> list( concat( 1, 2, concat( 3, 4 ), 5, 6 ) )
+ [1, 2, 3, 4, 5, 6]
+ >>> list( concat( 1, 2, concat( [ 3, 4 ] ), 5, 6 ) )
+ [1, 2, [3, 4], 5, 6]
+
+ Note that while strings are technically iterable, concat() does not flatten them.
+
+ >>> list( concat( 'ab' ) )
+ ['ab']
+ >>> list( concat( concat( 'ab' ) ) )
+ ['ab']
+ """
+
+ def __init__( self, *args ):
+ super( concat, self ).__init__( )
+ self.args = args
+
+ def __iter__( self ):
+ def expand( x ):
+ if isinstance( x, concat ) and len( x.args ) == 1:
+ i = x.args
+ else:
+ try:
+ i = x.__iter__( )
+ except AttributeError:
+ i = x,
+ return i
+
+ return flatten( imap( expand, self.args ) )
+
+
+# noinspection PyPep8Naming
+class crush( object ):
+ """
+ >>> list(crush([]))
+ []
+ >>> list(crush([[]]))
+ []
+ >>> list(crush([1]))
+ [1]
+ >>> list(crush([[1]]))
+ [1]
+ >>> list(crush([[[]]]))
+ []
+ >>> list(crush([1,(),['two'],([3, 4],),{5}]))
+ [1, 'two', 3, 4, 5]
+
+ >>> list(crush(1))
+ Traceback (most recent call last):
+ ...
+ TypeError: 'int' object is not iterable
+
+ >>> list(crush('123'))
+ ['1', '2', '3']
+
+ The above is a bit of an anomaly since strings occurring inside iterables are not broken up:
+
+ >>> list(crush(['123']))
+ ['123']
+ """
+
+ def __init__( self, iterables ):
+ super( crush, self ).__init__( )
+ self.iterables = iterables
+
+ def __iter__( self ):
+ def expand( x ):
+ try:
+ # Using __iter__() instead of iter() prevents breaking up of strings
+ return crush( x.__iter__( ) )
+ except AttributeError:
+ return x,
+
+ return flatten( imap( expand, self.iterables ) )
diff --git a/src/bd2k/util/lockfile.py b/src/bd2k/util/lockfile.py
new file mode 100644
index 0000000..deebb9d
--- /dev/null
+++ b/src/bd2k/util/lockfile.py
@@ -0,0 +1,36 @@
+from __future__ import absolute_import
+
+import errno
+import logging as log
+import os
+
+try:
+ from lockfile.pidlockfile import PIDLockFile
+except:
+ pass
+else:
+ class SmartPIDLockFile( PIDLockFile ):
+ """
+ A PID lock file that breaks the lock if the owning process doesn't exist
+ """
+
+ def process_alive( self, pid ):
+ try:
+ os.kill( pid, 0 )
+ # now we know the process exists
+ return True
+ except OSError as e:
+ if e.errno == errno.ESRCH:
+ # now we know the process doesn't exist
+ return False
+ else:
+ # now we're not sure
+ return None
+
+ def acquire( self, timeout=None ):
+ owner = self.read_pid( )
+ if owner is not None and owner != os.getpid( ) and self.process_alive( owner ) is False:
+ log.warn( "Breaking lock '%s' since owning process %i is dead."
+ % (self.lock_file, owner) )
+ self.break_lock( )
+ PIDLockFile.acquire( self, timeout )
diff --git a/src/bd2k/util/logging.py b/src/bd2k/util/logging.py
new file mode 100644
index 0000000..93fec58
--- /dev/null
+++ b/src/bd2k/util/logging.py
@@ -0,0 +1,29 @@
+from __future__ import absolute_import
+
+import codecs
+import types
+import logging
+
+
+class Utf8SyslogFormatter( logging.Formatter ):
+ """
+ Works around http://bugs.python.org/issue14452
+ """
+
+ def format( self, record ):
+ origGetMessage = record.getMessage
+
+ def getMessage( _self ):
+ msg = origGetMessage( )
+ if isinstance( msg, unicode ):
+ try:
+ # First check if we can represent the message as ASCII without loosing
+ # information. That we we can avoid writing the BOM unless absolutely necessary.
+ msg = msg.encode( 'ascii' )
+ except UnicodeEncodeError:
+ msg = codecs.BOM + msg.encode( 'utf8' )
+ return msg
+
+ types.MethodType( getMessage, record, logging.LogRecord )
+ record.getMessage = types.MethodType( getMessage, record, logging.LogRecord )
+ return logging.Formatter.format( self, record )
diff --git a/src/bd2k/util/objects.py b/src/bd2k/util/objects.py
new file mode 100644
index 0000000..4f44e7d
--- /dev/null
+++ b/src/bd2k/util/objects.py
@@ -0,0 +1,217 @@
+from __future__ import absolute_import
+from bd2k.util import sync_memoize
+
+
+class abstractclassmethod( classmethod ):
+ """
+ This class defines a decorator that allows the decorated class to be both an abstract method
+ and a class method.
+
+ Shamelessly stolen from
+
+ http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod
+
+ >>> from abc import ABCMeta
+
+ >>> class DemoABC:
+ ... __metaclass__ = ABCMeta
+ ...
+ ... @abstractclassmethod
+ ... def from_int(cls, n):
+ ... return cls()
+
+ >>> class DemoConcrete(DemoABC):
+ ... @classmethod
+ ... def from_int(cls, n):
+ ... return cls(2*n)
+ ...
+ ... def __init__(self, n):
+ ... print ('Initializing with %s' % n)
+
+ >>> d = DemoConcrete(5) # Succeeds by calling a concrete __init__()
+ Initializing with 5
+
+ >>> d = DemoConcrete.from_int(5) # Succeeds by calling a concrete from_int()
+ Initializing with 10
+
+ >>> DemoABC() # Fails because from_int() is abstract
+ Traceback (most recent call last):
+ ...
+ TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int
+
+ >>> DemoABC.from_int(5) # Fails because from_int() is not implemented
+ Traceback (most recent call last):
+ ...
+ TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int
+ """
+ __isabstractmethod__ = True
+
+ def __init__(self, callable):
+ callable.__isabstractmethod__ = True
+ super(abstractclassmethod, self).__init__(callable)
+
+
+class abstractstaticmethod( staticmethod ):
+ """
+ This class defines a decorator that allows the decorated class to be both an abstract method
+ and a static method.
+
+ Based on code found at
+
+ http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod
+
+ >>> from abc import ABCMeta
+
+ >>> class DemoABC:
+ ... __metaclass__ = ABCMeta
+ ...
+ ... @abstractstaticmethod
+ ... def f(n):
+ ... raise NotImplementedError()
+
+ >>> class DemoConcrete(DemoABC):
+ ... @staticmethod
+ ... def f(n):
+ ... return (2*n)
+
+ >>> d = DemoABC.f(5) # Fails because f() is not implemented
+ Traceback (most recent call last):
+ ...
+ NotImplementedError
+
+ >>> DemoConcrete.f(5) # Succeeds by calling a concrete f()
+ 10
+ """
+ __isabstractmethod__ = True
+
+ def __init__(self, callable):
+ callable.__isabstractmethod__ = True
+ super(abstractstaticmethod, self).__init__(callable)
+
+
+class InnerClass( object ):
+ """
+ Note that this is EXPERIMENTAL code.
+
+ A nested class (the inner class) decorated with this will have an additional attribute called
+ 'outer' referencing the instance of the nesting class (the outer class) that was used to
+ create the inner class. The outer instance does not need to be passed to the inner class's
+ constructor, it will be set magically. Shamelessly stolen from
+
+ http://stackoverflow.com/questions/2278426/inner-classes-how-can-i-get-the-outer-class-object-at-construction-time#answer-2278595.
+
+ with names made more descriptive (I hope) and added caching of the BoundInner classes.
+
+ Caveat: Within the inner class, self.__class__ will not be the inner class but a dynamically
+ created subclass thereof. It's name will be the same as that of the inner class,
+ but its __module__ will be different. There will be one such dynamic subclass per inner class
+ and instance of outer class, if that outer class instance created any instances of inner the
+ class.
+
+ >>> class Outer(object):
+ ... def new_inner(self):
+ ... # self is an instance of the outer class
+ ... inner = self.Inner()
+ ... # the inner instance's 'outer' attribute is set to the outer instance
+ ... assert inner.outer is self
+ ... return inner
+ ... @InnerClass
+ ... class Inner(object):
+ ... def get_outer(self):
+ ... return self.outer
+ ... @classmethod
+ ... def new_inner(cls):
+ ... return cls()
+ >>> o = Outer()
+ >>> i = o.new_inner()
+ >>> i # doctest: +ELLIPSIS
+ <bd2k.util.objects.Inner object at ...> bound to <bd2k.util.objects.Outer object at ...>
+
+ >>> i.get_outer() # doctest: +ELLIPSIS
+ <bd2k.util.objects.Outer object at ...>
+
+ Now with inheritance for both inner and outer:
+
+ >>> class DerivedOuter(Outer):
+ ... def new_inner(self):
+ ... return self.DerivedInner()
+ ... @InnerClass
+ ... class DerivedInner(Outer.Inner):
+ ... def get_outer(self):
+ ... assert super( DerivedOuter.DerivedInner, self ).get_outer() == self.outer
+ ... return self.outer
+ >>> derived_outer = DerivedOuter()
+ >>> derived_inner = derived_outer.new_inner()
+ >>> derived_inner # doctest: +ELLIPSIS
+ <bd2k.util.objects.DerivedInner object at ...> bound to <bd2k.util.objects.DerivedOuter object at ...>
+
+ >>> derived_inner.get_outer() # doctest: +ELLIPSIS
+ <bd2k.util.objects.DerivedOuter object at ...>
+
+ Test a static references:
+ >>> Outer.Inner
+ <class 'bd2k.util.objects.Inner'>
+ >>> DerivedOuter.Inner
+ <class 'bd2k.util.objects.Inner'>
+ >>> DerivedOuter.DerivedInner
+ <class 'bd2k.util.objects.DerivedInner'>
+
+ Can't decorate top-level classes. Unfortunately, this is detected when the instance is
+ created, not when the class is defined.
+ >>> @InnerClass
+ ... class Foo(object):
+ ... pass
+ >>> Foo()
+ Traceback (most recent call last):
+ ...
+ RuntimeError: Inner classes must be nested in another class.
+
+ All inner instances should refer to a single outer instance:
+ >>> o = Outer()
+ >>> o.new_inner().outer == o == o.new_inner().outer
+ True
+
+ All inner instances should be of the same class ...
+ >>> o.new_inner().__class__ == o.new_inner().__class__
+ True
+
+ ... but that class isn't the inner class ...
+ >>> o.new_inner().__class__ != Outer.Inner
+ True
+
+ ... but a subclass of the inner class.
+ >>> isinstance( o.new_inner(), Outer.Inner )
+ True
+
+ Static and class methods, e.g. should work, too
+
+ >>> o.Inner.new_inner().outer == o
+ True
+ """
+
+ def __init__( self, inner_class ):
+ super( InnerClass, self ).__init__( )
+ self.inner_class = inner_class
+
+ # noinspection PyUnusedLocal
+ def __get__( self, instance, owner ):
+ # No need to wrap a static reference, i.e one that is made via 'Outer.' rather than 'self.'
+ if instance is None:
+ return self.inner_class
+ else:
+ return self._bind( instance )
+
+ @sync_memoize
+ def _bind( self, _outer ):
+ class BoundInner( self.inner_class ):
+ outer = _outer
+
+ def __repr__( self ):
+ return "%s bound to %s" % (super( BoundInner, self ).__repr__( ), repr( _outer ))
+
+ BoundInner.__name__ = self.inner_class.__name__
+ BoundInner.__module__ = self.inner_class.__module__
+ return BoundInner
+
+ def __call__( *args, **kwargs ):
+ raise RuntimeError( "Inner classes must be nested in another class." )
diff --git a/src/bd2k/util/processes.py b/src/bd2k/util/processes.py
new file mode 100644
index 0000000..a80732a
--- /dev/null
+++ b/src/bd2k/util/processes.py
@@ -0,0 +1,35 @@
+import os
+
+
+def which( name, path=None ):
+ """
+ Look for an executable file of the given name in the given list of directories,
+ or the directories listed in the PATH variable of the current environment. Roughly the
+ equivalent of the `which` program. Does not work on Windows.
+
+ :type name: str
+ :param name: the name of the program
+
+ :type path: Iterable
+ :param path: the directory paths to consider or None if the directories referenced in the
+ PATH environment variable should be used instead
+
+ :returns: an iterator yielding the full path to every occurrance of an executable file of the
+ given name in a directory on the given path or the PATH environment variable if no path was
+ passed
+
+ >>> next( which('ls') )
+ '/bin/ls'
+ >>> list( which('asdalskhvxjvkjhsdasdnbmfiewwewe') )
+ []
+ >>> list( which('ls', path=()) )
+ []
+ """
+ if path is None:
+ path = os.environ.get( 'PATH' )
+ if path is None: return
+ path = path.split( os.pathsep )
+ for bin_dir in path:
+ executable_path = os.path.join( bin_dir, name )
+ if os.access( executable_path, os.X_OK ):
+ yield executable_path
diff --git a/src/bd2k/util/retry.py b/src/bd2k/util/retry.py
new file mode 100644
index 0000000..031b338
--- /dev/null
+++ b/src/bd2k/util/retry.py
@@ -0,0 +1,138 @@
+from __future__ import absolute_import
+
+import time
+import urllib2
+from contextlib import contextmanager
+
+import logging
+
+log = logging.getLogger( __name__ )
+
+
+# noinspection PyUnusedLocal
+def never( exception ):
+ return False
+
+
+def retry( delays=(0, 1, 1, 4, 16, 64), timeout=300, predicate=never ):
+ """
+ Retry an operation while the failure matches a given predicate and until a given timeout
+ expires, waiting a given amount of time in between attempts. This function is a generator
+ that yields contextmanagers. See doctests below for example usage.
+
+ :param Iterable[float] delays: an interable yielding the time in seconds to wait before each
+ retried attempt, the last element of the iterable will be repeated.
+
+ :param float timeout: a overall timeout that should not be exceeded for all attempts together.
+ This is a best-effort mechanism only and it won't abort an ongoing attempt, even if the
+ timeout expires during that attempt.
+
+ :param Callable[[Exception],bool] predicate: a unary callable returning True if another
+ attempt should be made to recover from the given exception. The default value for this
+ parameter will prevent any retries!
+
+ :return: a generator yielding context managers, one per attempt
+ :rtype: Iterator
+
+ Retry for a limited amount of time:
+
+ >>> true = lambda _:True
+ >>> false = lambda _:False
+ >>> i = 0
+ >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ):
+ ... with attempt:
+ ... i += 1
+ ... raise RuntimeError('foo')
+ Traceback (most recent call last):
+ ...
+ RuntimeError: foo
+ >>> i > 1
+ True
+
+ If timeout is 0, do exactly one attempt:
+
+ >>> i = 0
+ >>> for attempt in retry( timeout=0 ):
+ ... with attempt:
+ ... i += 1
+ ... raise RuntimeError( 'foo' )
+ Traceback (most recent call last):
+ ...
+ RuntimeError: foo
+ >>> i
+ 1
+
+ Don't retry on success:
+
+ >>> i = 0
+ >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ):
+ ... with attempt:
+ ... i += 1
+ >>> i
+ 1
+
+ Don't retry on unless predicate returns True:
+
+ >>> i = 0
+ >>> for attempt in retry( delays=[0], timeout=.1, predicate=false):
+ ... with attempt:
+ ... i += 1
+ ... raise RuntimeError( 'foo' )
+ Traceback (most recent call last):
+ ...
+ RuntimeError: foo
+ >>> i
+ 1
+ """
+ if timeout > 0:
+ go = [ None ]
+
+ @contextmanager
+ def repeated_attempt( delay ):
+ try:
+ yield
+ except Exception as e:
+ if time.time( ) + delay < expiration and predicate( e ):
+ log.info( 'Got %s, trying again in %is.', e, delay )
+ time.sleep( delay )
+ else:
+ raise
+ else:
+ go.pop( )
+
+ delays = iter( delays )
+ expiration = time.time( ) + timeout
+ delay = next( delays )
+ while go:
+ yield repeated_attempt( delay )
+ delay = next( delays, delay )
+ else:
+ @contextmanager
+ def single_attempt( ):
+ yield
+
+ yield single_attempt( )
+
+
+default_delays = (0, 1, 1, 4, 16, 64)
+default_timeout = 300
+
+
+def retryable_http_error( e ):
+ return isinstance( e, urllib2.HTTPError ) and e.code in ('503', '408', '500')
+
+
+def retry_http( delays=default_delays, timeout=default_timeout, predicate=retryable_http_error ):
+ """
+ >>> i = 0
+ >>> for attempt in retry_http(timeout=5):
+ ... with attempt:
+ ... i += 1
+ ... raise urllib2.HTTPError('http://www.test.com', '408', 'some message', {}, None)
+ Traceback (most recent call last):
+ ...
+ HTTPError: HTTP Error 408: some message
+ >>> i > 1
+ True
+ """
+ return retry( delays=delays, timeout=timeout, predicate=predicate )
diff --git a/src/bd2k/util/shell.py b/src/bd2k/util/shell.py
new file mode 100644
index 0000000..562b677
--- /dev/null
+++ b/src/bd2k/util/shell.py
@@ -0,0 +1,28 @@
+import re
+
+
+def quote(s, level=1):
+ for i in xrange( 0, level ):
+ s = _quote( s )
+ return s
+
+
+_find_unsafe = re.compile( r'[^\w@%+=:,./-]' ).search
+
+
+def _quote(s):
+ """
+ Return a shell-escaped version of the string *s*.
+
+ Stolen from Python 3's shlex module
+ """
+ if not s:
+ return "''"
+ if _find_unsafe( s ) is None:
+ return s
+
+ # use single quotes, and put single quotes into double quotes
+ # the string $'b is then quoted as '$'"'"'b'
+ return "'" + s.replace( "'", "'\"'\"'" ) + "'"
+
+
diff --git a/src/bd2k/util/strings.py b/src/bd2k/util/strings.py
new file mode 100644
index 0000000..563004f
--- /dev/null
+++ b/src/bd2k/util/strings.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+
+import inspect
+
+
+def to_english( iterable, separator=", ", conjunction=' and ', empty='empty',
+ wrapper=None, pair_conjunction=None):
+ """
+ Convert list to a string containing an enumeration in plain English.
+
+ :param iterable: an iterable of strings or objects that can be cast to a string
+
+ :param separator: the text to insert between elements
+
+ :param conjunction: the text used to connect the final element
+
+ :param empty: the text to be used to represent an empty iterable
+
+ :param wrapper: the text to surround the elements
+
+ :param pair_conjunction: the conjunction to use between elements if there are exactly two of
+ them, defaults to conjunction
+
+ >>> to_english( [], empty='nada' )
+ 'nada'
+ >>> to_english( [ 1 ] )
+ '1'
+ >>> to_english( [ 1, 2 ], conjunction=' or ' )
+ '1 or 2'
+ >>> to_english( [ 1, 2, 3 ], conjunction=' or ')
+ '1, 2 or 3'
+ >>> to_english( [ 1, 2, 3 ], separator='; ', conjunction=' or ')
+ '1; 2 or 3'
+ >>> to_english( [ 1, 2, 3 ], conjunction=', and ', pair_conjunction=' and ' )
+ '1, 2, and 3'
+ >>> to_english( [ 1, 2 ], conjunction=', and ', pair_conjunction=' and ' )
+ '1 and 2'
+ >>> to_english( [ 1 ], conjunction=', and ', pair_conjunction=' and ' )
+ '1'
+ """
+ i = iter( iterable )
+ try:
+ x = i.next( )
+ except StopIteration:
+ return empty
+ r = [ ]
+ while True:
+ x = str( x )
+ if wrapper is not None:
+ x = wrapper + x + wrapper
+ try:
+ n = i.next( )
+ except StopIteration:
+ if len(r) > 2:
+ r.append( conjunction )
+ elif len(r) > 0:
+ r.append( conjunction if pair_conjunction is None else pair_conjunction )
+ r.append( x )
+ break
+ else:
+ if r: r.append( separator )
+ r.append( x )
+ x = n
+ return ''.join( r )
+
+
+def interpolate( template, skip_frames=0, **kwargs ):
+ """
+ Interpolate {…} placeholders in the given template string with the given values or the local
+ variables in the calling scope. The syntax of the format string is the same as for the
+ built-in string format function. Explicitly passed keyword arguments take precedence over
+ local variables which take precedence over global variables.
+
+ Unlike with Python scoping rules, only the variables in a single frame are examined.
+
+ Example usage:
+
+ >>> x = 1
+ >>> interpolate( "{x}" )
+ '1'
+ >>> interpolate( "{x}", x=2 )
+ '2'
+ >>> interpolate( "{x} {y}", y=2 )
+ '1 2'
+
+ Use
+
+ from bd2k.util.strings import interpolate as fmt
+
+ to import this function under a shortened alias.
+ """
+ return __interpolate( template, skip_frames, kwargs )
+
+
+def interpolate_dict( template, dictionary, skip_frames=0 ):
+ """
+ Equivalent to
+
+ interpolate( template, skip_frames, **dictionary )
+
+ Example usage:
+
+ >>> x = 1
+ >>> interpolate_dict( "{x}", {} )
+ '1'
+ >>> interpolate_dict( "{x}", dict(x=2) )
+ '2'
+ >>> interpolate_dict( "{x} {y}", dict(y=2) )
+ '1 2'
+ """
+ return __interpolate( template, skip_frames, dictionary )
+
+
+# This is a separate function such that the depth to the client stack frame is the same for
+# interpolate() and interpolate_dict()
+
+def __interpolate( template, skip_frames, dictionary ):
+ frame = inspect.currentframe( )
+ for i in xrange( skip_frames + 2 ):
+ prev_frame = frame
+ frame = frame.f_back
+ del prev_frame
+ try:
+ env = frame.f_globals.copy( )
+ env.update( frame.f_locals )
+ env.update( dictionary )
+ finally:
+ del frame
+ return template.format( **env )
diff --git a/src/bd2k/util/test/__init__.py b/src/bd2k/util/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/bd2k/util/test/test_d32.py b/src/bd2k/util/test/test_d32.py
new file mode 100644
index 0000000..aaf9711
--- /dev/null
+++ b/src/bd2k/util/test/test_d32.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Inspired by JavaScript code found at https://github.com/dominictarr/d64
+
+from __future__ import absolute_import
+from unittest import TestCase
+from bd2k.util.d32 import standard as d32
+import os
+
+
+class TestD32( TestCase ):
+ def test( self ):
+ l = [ os.urandom( i ) for i in xrange( 1000 ) ]
+ self.assertEqual( map( d32.decode, sorted( map( d32.encode, l ) ) ), sorted( l ) )
diff --git a/src/bd2k/util/test/test_d64.py b/src/bd2k/util/test/test_d64.py
new file mode 100644
index 0000000..efdbcc1
--- /dev/null
+++ b/src/bd2k/util/test/test_d64.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2014 Dominic Tarr
+# Copyright (c) 2015 Hannes Schmidt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+# and associated documentation files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Ported from JS found at https://github.com/dominictarr/d64
+
+from __future__ import absolute_import
+from unittest import TestCase
+from bd2k.util.d64 import standard as d64
+import os
+
+
+class TestD64( TestCase ):
+ def test( self ):
+ l = [ os.urandom( i ) for i in xrange( 1000 ) ]
+ self.assertEqual( map( d64.decode, sorted( map( d64.encode, l ) ) ), sorted( l ) )
diff --git a/src/bd2k/util/test/test_files.py b/src/bd2k/util/test/test_files.py
new file mode 100644
index 0000000..c693c16
--- /dev/null
+++ b/src/bd2k/util/test/test_files.py
@@ -0,0 +1,32 @@
+from unittest import TestCase
+
+from mock import MagicMock, call
+
+
+class TestFiles( TestCase ):
+ if False:
+ from bd2k.util.files import gread, gwrite
+ # See comment in module under test
+ def test_gread( self ):
+ for n in range( 0, 4 ):
+ f = MagicMock( )
+ # The mock file contains "12". Each read() invocation shall return one byte from that,
+ # followed by the empty string for EOF.
+ f.read.side_effect = [ '1', '2', '' ]
+ # Read n bytes greedily
+ # noinspection PyTypeChecker
+ self.assertEqual( self.gread( f, n ), "12"[ :n ] )
+ # First call to read() should request n bytes and then one less on each subsequent call.
+ self.assertEqual( f.mock_calls, [ call.read( i ) for i in range( n, 0, -1 ) ] )
+
+ def test_gwrite( self ):
+ for n in range( 0, 3 ):
+ f = MagicMock( )
+ # Each write invocation shall write a single byte.
+ f.write.side_effect = [ 1 ] * n
+ s = "12"[ :n ]
+ # noinspection PyTypeChecker
+ self.gwrite( f, s )
+ # The first call to write() should be passed the entire string, minus one byte off
+ # the front for each subsequent call.
+ self.assertEqual( f.mock_calls, [ call.write( s[ i: ] ) for i in range( 0, n ) ] )
diff --git a/src/bd2k/util/test/test_panic.py b/src/bd2k/util/test/test_panic.py
new file mode 100644
index 0000000..1737875
--- /dev/null
+++ b/src/bd2k/util/test/test_panic.py
@@ -0,0 +1,80 @@
+import inspect
+import logging
+import unittest
+import sys
+
+from bd2k.util.exceptions import panic
+
+log = logging.getLogger( __name__ )
+logging.basicConfig( )
+
+
+class TestPanic( unittest.TestCase ):
+ def test_panic_by_hand( self ):
+ try:
+ self.try_and_panic_by_hand( )
+ except:
+ self.__assert_raised_exception_is_primary( )
+
+ def test_panic( self ):
+ try:
+ self.try_and_panic( )
+ except:
+ self.__assert_raised_exception_is_primary( )
+
+ def test_panic_with_secondary( self ):
+ try:
+ self.try_and_panic_with_secondary( )
+ except:
+ self.__assert_raised_exception_is_primary( )
+
+ def test_nested_panic( self ):
+ try:
+ self.try_and_nested_panic_with_secondary( )
+ except:
+ self.__assert_raised_exception_is_primary( )
+
+ def try_and_panic_by_hand( self ):
+ try:
+ self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+ raise ValueError( "primary" )
+ except Exception:
+ exc_type, exc_value, exc_traceback = sys.exc_info( )
+ try:
+ raise RuntimeError( "secondary" )
+ except Exception:
+ pass
+ raise exc_type, exc_value, exc_traceback
+
+ def try_and_panic( self ):
+ try:
+ self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+ raise ValueError( "primary" )
+ except:
+ with panic( log ):
+ pass
+
+ def try_and_panic_with_secondary( self ):
+ try:
+ self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+ raise ValueError( "primary" )
+ except:
+ with panic( log ):
+ raise RuntimeError( "secondary" )
+
+ def try_and_nested_panic_with_secondary( self ):
+ try:
+ self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1
+ raise ValueError( "primary" )
+ except:
+ with panic( log ):
+ with panic( log ):
+ raise RuntimeError( "secondary" )
+
+ def __assert_raised_exception_is_primary( self ):
+ exc_type, exc_value, exc_traceback = sys.exc_info( )
+ self.assertEquals( exc_type, ValueError )
+ self.assertEquals( exc_value.message, "primary" )
+ while exc_traceback.tb_next is not None:
+ exc_traceback = exc_traceback.tb_next
+ self.assertEquals( exc_traceback.tb_lineno, self.line_of_primary_exc )
diff --git a/src/bd2k/util/test/test_strings.py b/src/bd2k/util/test/test_strings.py
new file mode 100644
index 0000000..acc4d10
--- /dev/null
+++ b/src/bd2k/util/test/test_strings.py
@@ -0,0 +1,13 @@
+import unittest
+
+from bd2k.util.strings import interpolate
+from bd2k.util.strings import to_english
+
+foo = 4
+bar = 1
+
+
+class TestStrings( unittest.TestCase ):
+ def test_interpolate( self ):
+ bar = 2 # should override the global foo
+ self.assertEquals( interpolate( "{foo}{bar}" ), "42" )
diff --git a/src/bd2k/util/threading.py b/src/bd2k/util/threading.py
new file mode 100644
index 0000000..da69067
--- /dev/null
+++ b/src/bd2k/util/threading.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+import sys
+import threading
+
+
+class BoundedEmptySemaphore( threading._BoundedSemaphore ):
+ """
+ A bounded semaphore that is initially empty.
+ """
+
+ def __init__( self, value=1, verbose=None ):
+ super( BoundedEmptySemaphore, self ).__init__( value, verbose )
+ for i in xrange( value ):
+ assert self.acquire( blocking=False )
+
+
+class ExceptionalThread( threading.Thread ):
+ """
+ A thread whose join() method re-raises exceptions raised during run(). While join() is
+ idempotent, the exception is only during the first invocation of join() that succesfully
+ joined the thread. If join() times out, no exception will be re reraised even though an
+ exception might already have occured in run().
+
+ When subclassing this thread, override tryRun() instead of run().
+
+ >>> def f():
+ ... assert 0
+ >>> t = ExceptionalThread(target=f)
+ >>> t.start()
+ >>> t.join()
+ Traceback (most recent call last):
+ ...
+ AssertionError
+
+ >>> class MyThread(ExceptionalThread):
+ ... def tryRun( self ):
+ ... assert 0
+ >>> t = MyThread()
+ >>> t.start()
+ >>> t.join()
+ Traceback (most recent call last):
+ ...
+ AssertionError
+
+ """
+
+ exc_info = None
+
+ def run( self ):
+ try:
+ self.tryRun( )
+ except:
+ self.exc_info = sys.exc_info( )
+ raise
+
+ def tryRun( self ):
+ super( ExceptionalThread, self ).run( )
+
+ def join( self, *args, **kwargs ):
+ super( ExceptionalThread, self ).join( *args, **kwargs )
+ if not self.is_alive( ) and self.exc_info is not None:
+ type, value, traceback = self.exc_info
+ self.exc_info = None
+ raise type, value, traceback
+
+
+# noinspection PyPep8Naming
+class defaultlocal( threading.local ):
+ """
+ Thread local storage with default values for each field in each thread
+
+ >>> l = defaultlocal( foo=42 )
+ >>> def f(): print l.foo
+ >>> t = threading.Thread(target=f)
+ >>> t.start() ; t.join()
+ 42
+ """
+
+ def __init__( self, **kwargs ):
+ super( defaultlocal, self ).__init__( )
+ self.__dict__.update( kwargs )
diff --git a/src/bd2k/util/throttle.py b/src/bd2k/util/throttle.py
new file mode 100644
index 0000000..a69165f
--- /dev/null
+++ b/src/bd2k/util/throttle.py
@@ -0,0 +1,203 @@
+from __future__ import absolute_import
+
+import time
+import threading
+
+from bd2k.util.threading import BoundedEmptySemaphore
+
+
+class GlobalThrottle:
+ """
+ A thread-safe rate limiter that throttles all threads globally. This should be used to
+ regulate access to a global resource. It can be used as a function/method decorator or as a
+ simple object, using the throttle() method. The token generation starts with the first call
+ to throttle() or the decorated function. Each subsequent call to throttle() will then acquire
+ a token, possibly having to wait until one becomes available. The number of unused tokens
+ will not exceed a limit given at construction time. This is a very basic mechanism to
+ prevent the resource from becoming swamped after longer pauses.
+ """
+
+ def __init__( self, min_interval, max_unused ):
+ self.min_interval = min_interval
+ self.semaphore = BoundedEmptySemaphore( max_unused )
+ self.thread_start_lock = threading.Lock( )
+ self.thread_started = False
+ self.thread = threading.Thread( target=self.generator )
+ self.thread.daemon = True
+
+ def generator( self ):
+ while True:
+ try:
+ self.semaphore.release( )
+ except ValueError:
+ pass
+ time.sleep( self.min_interval )
+
+ def throttle( self, wait=True ):
+ """
+ If the wait parameter is True, this method returns True after suspending the current
+ thread as necessary to ensure that no less than the configured minimum interval passed
+ since the most recent time an invocation of this method returned True in any thread.
+
+ If the wait parameter is False, this method immediatly returns True if at least the
+ configured minimum interval has passed since the most recent time this method returned
+ True in any thread, or False otherwise.
+ """
+ # I think there is a race in Thread.start(), hence the lock
+ with self.thread_start_lock:
+ if not self.thread_started:
+ self.thread.start( )
+ self.thread_started = True
+ return self.semaphore.acquire( blocking=wait )
+
+ def __call__( self, function ):
+ def wrapper( *args, **kwargs ):
+ self.throttle( )
+ return function( *args, **kwargs )
+
+ return wrapper
+
+
+class LocalThrottle:
+ """
+ A thread-safe rate limiter that throttles each thread independently. Can be used as a
+ function or method decorator or as a simple object, via its .throttle() method.
+
+ The use as a decorator is deprecated in favor of throttle().
+ """
+
+ def __init__( self, min_interval ):
+ """
+ Initialize this local throttle.
+
+ :param min_interval: The minimum interval in seconds between invocations of the throttle
+ method or, if this throttle is used as a decorator, invocations of the decorated method.
+ """
+ self.min_interval = min_interval
+ self.per_thread = threading.local( )
+ self.per_thread.last_invocation = None
+
+ def throttle( self, wait=True ):
+ """
+ If the wait parameter is True, this method returns True after suspending the current
+ thread as necessary to ensure that no less than the configured minimum interval has
+ passed since the last invocation of this method in the current thread returned True.
+
+ If the wait parameter is False, this method immediatly returns True (if at least the
+ configured minimum interval has passed since the last time this method returned True in
+ the current thread) or False otherwise.
+ """
+ now = time.time( )
+ last_invocation = self.per_thread.last_invocation
+ if last_invocation is not None:
+ interval = now - last_invocation
+ if interval < self.min_interval:
+ if wait:
+ remainder = self.min_interval - interval
+ time.sleep( remainder )
+ else:
+ return False
+ self.per_thread.last_invocation = now
+ return True
+
+ def __call__( self, function ):
+ def wrapper( *args, **kwargs ):
+ self.throttle( )
+ return function( *args, **kwargs )
+
+ return wrapper
+
+
+class throttle( object ):
+ """
+ A context manager for ensuring that the execution of its body takes at least a given amount
+ of time, sleeping if necessary. It is a simpler version of LocalThrottle if used as a
+ decorator.
+
+ Ensures that body takes at least the given amount of time.
+
+ >>> start = time.time()
+ >>> with throttle(1):
+ ... pass
+ >>> 1 <= time.time() - start <= 1.1
+ True
+
+ Ditto when used as a decorator.
+
+ >>> @throttle(1)
+ ... def f():
+ ... pass
+ >>> start = time.time()
+ >>> f()
+ >>> 1 <= time.time() - start <= 1.1
+ True
+
+ If the body takes longer by itself, don't throttle.
+
+ >>> start = time.time()
+ >>> with throttle(1):
+ ... time.sleep(2)
+ >>> 2 <= time.time() - start <= 2.1
+ True
+
+ Ditto when used as a decorator.
+
+ >>> @throttle(1)
+ ... def f():
+ ... time.sleep(2)
+ >>> start = time.time()
+ >>> f()
+ >>> 2 <= time.time() - start <= 2.1
+ True
+
+ If an exception occurs, don't throttle.
+
+ >>> start = time.time()
+ >>> try:
+ ... with throttle(1):
+ ... raise ValueError('foo')
+ ... except ValueError:
+ ... end = time.time()
+ ... raise
+ Traceback (most recent call last):
+ ...
+ ValueError: foo
+ >>> 0 <= end - start <= 0.1
+ True
+
+ Ditto when used as a decorator.
+
+ >>> @throttle(1)
+ ... def f():
+ ... raise ValueError('foo')
+ >>> start = time.time()
+ >>> try:
+ ... f()
+ ... except ValueError:
+ ... end = time.time()
+ ... raise
+ Traceback (most recent call last):
+ ...
+ ValueError: foo
+ >>> 0 <= end - start <= 0.1
+ True
+ """
+
+ def __init__( self, min_interval ):
+ self.min_interval = min_interval
+
+ def __enter__( self ):
+ self.start = time.time( )
+
+ def __exit__( self, exc_type, exc_val, exc_tb ):
+ if exc_type is None:
+ duration = time.time( ) - self.start
+ remainder = self.min_interval - duration
+ if remainder > 0:
+ time.sleep( remainder )
+
+ def __call__( self, function ):
+ def wrapper( *args, **kwargs ):
+ with self:
+ return function( *args, **kwargs )
+ return wrapper
diff --git a/src/bd2k/util/xml/__init__.py b/src/bd2k/util/xml/__init__.py
new file mode 100644
index 0000000..f34c55b
--- /dev/null
+++ b/src/bd2k/util/xml/__init__.py
@@ -0,0 +1 @@
+__author__ = 'hannes'
diff --git a/src/bd2k/util/xml/builder.py b/src/bd2k/util/xml/builder.py
new file mode 100644
index 0000000..a5ad6cb
--- /dev/null
+++ b/src/bd2k/util/xml/builder.py
@@ -0,0 +1,236 @@
+# This is a port from lxml.builder.E which itself was inspired by an idea by the creator of
+# ElementTree (http://effbot.org/zone/element-builder.htm). Support for namespaces was removed.
+#
+# -- Hannes
+
+#
+# Element generator factory by Fredrik Lundh.
+#
+# Source:
+# http://online.effbot.org/2006_11_01_archive.htm#et-builder
+# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+from __future__ import absolute_import
+
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
+import xml.etree.ElementTree as ET
+
+try:
+ from functools import partial
+except ImportError:
+ # fake it for pre-2.5 releases
+ def partial(func, tag):
+ return lambda *args, **kwargs: func(tag, *args, **kwargs)
+
+try:
+ callable
+except NameError:
+ # Python 3
+ def callable(f):
+ return hasattr(f, '__call__')
+
+try:
+ basestring
+except NameError:
+ basestring = str
+
+try:
+ unicode
+except NameError:
+ unicode = str
+
+
+class ElementMaker(object):
+ """Element generator factory.
+
+ Unlike the ordinary Element factory, the E factory allows you to pass in
+ more than just a tag and some optional attributes; you can also pass in
+ text and other elements. The text is added as either text or tail
+ attributes, and elements are inserted at the right spot. Some small
+ examples::
+
+ >>> import xml.etree.ElementTree as ET
+
+ >>> ET.tostring(E("tag"))
+ '<tag />'
+ >>> ET.tostring(E("tag", "text"))
+ '<tag>text</tag>'
+ >>> ET.tostring(E("tag", "text", key="value"))
+ '<tag key="value">text</tag>'
+ >>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
+ '<tag><subtag>text</subtag>tail</tag>'
+
+ For simple tags, the factory also allows you to write ``E.tag(...)`` instead
+ of ``E('tag', ...)``::
+
+ >>> ET.tostring(E.tag())
+ '<tag />'
+ >>> ET.tostring(E.tag("text"))
+ '<tag>text</tag>'
+ >>> ET.tostring(E.tag(E.subtag("text"), "tail"))
+ '<tag><subtag>text</subtag>tail</tag>'
+
+ Here's a somewhat larger example; this shows how to generate HTML
+ documents, using a mix of prepared factory functions for inline elements,
+ nested ``E.tag`` calls, and embedded XHTML fragments::
+
+ # some common inline elements
+ A = E.a
+ I = E.i
+ B = E.b
+
+ def CLASS(v):
+ # helper function, 'class' is a reserved word
+ return {'class': v}
+
+ page = (
+ E.html(
+ E.head(
+ E.title("This is a sample document")
+ ),
+ E.body(
+ E.h1("Hello!", CLASS("title")),
+ E.p("This is a paragraph with ", B("bold"), " text in it!"),
+ E.p("This is another paragraph, with a ",
+ A("link", href="http://www.python.org"), "."),
+ E.p("Here are some reservered characters: <spam&egg>."),
+ ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"),
+ )
+ )
+ )
+
+ print ET.tostring(page)
+
+ Here's a prettyprinted version of the output from the above script::
+
+ <html>
+ <head>
+ <title>This is a sample document</title>
+ </head>
+ <body>
+ <h1 class="title">Hello!</h1>
+ <p>This is a paragraph with <b>bold</b> text in it!</p>
+ <p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p>
+ <p>Here are some reservered characters: <spam&egg>.</p>
+ <p>And finally, here is an embedded XHTML fragment.</p>
+ </body>
+ </html>
+ """
+
+ def __init__(self, typemap=None,
+ namespace=None, makeelement=None):
+ if namespace is not None:
+ self._namespace = '{' + namespace + '}'
+ else:
+ self._namespace = None
+
+ if makeelement is not None:
+ assert callable(makeelement)
+ self._makeelement = makeelement
+ else:
+ self._makeelement = ET.Element
+
+ # initialize type map for this element factory
+
+ if typemap:
+ typemap = typemap.copy()
+ else:
+ typemap = {}
+
+ def add_text(elem, item):
+ try:
+ elem[-1].tail = (elem[-1].tail or "") + item
+ except IndexError:
+ elem.text = (elem.text or "") + item
+
+ def add_cdata(elem, cdata):
+ if elem.text:
+ raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
+ elem.text = cdata
+
+ if str not in typemap:
+ typemap[str] = add_text
+ if unicode not in typemap:
+ typemap[unicode] = add_text
+ # if ET.CDATA not in typemap:
+ # typemap[ET.CDATA] = add_cdata
+
+ def add_dict(elem, item):
+ attrib = elem.attrib
+ for k, v in item.items():
+ if isinstance(v, basestring):
+ attrib[k] = v
+ else:
+ attrib[k] = typemap[type(v)](None, v)
+ if dict not in typemap:
+ typemap[dict] = add_dict
+
+ self._typemap = typemap
+
+ def __call__(self, tag, *children, **attrib):
+ get = self._typemap.get
+
+ if self._namespace is not None and tag[0] != '{':
+ tag = self._namespace + tag
+ elem = self._makeelement(tag)
+ if attrib:
+ get(dict)(elem, attrib)
+
+ for item in children:
+ if callable(item):
+ item = item()
+ t = get(type(item))
+ if t is None:
+ if ET.iselement(item):
+ elem.append(item)
+ continue
+ for basetype in type(item).__mro__:
+ # See if the typemap knows of any of this type's bases.
+ t = get(basetype)
+ if t is not None:
+ break
+ else:
+ raise TypeError("bad argument type: %s(%r)" %
+ (type(item).__name__, item))
+ v = t(elem, item)
+ if v:
+ get(type(v))(elem, v)
+
+ return elem
+
+ def __getattr__(self, tag):
+ return partial(self, tag)
+
+# create factory object
+E = ElementMaker()
diff --git a/src/bd2k_python_lib.egg-info/PKG-INFO b/src/bd2k_python_lib.egg-info/PKG-INFO
new file mode 100644
index 0000000..5786e6e
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: bd2k-python-lib
+Version: 1.14a1.dev37
+Summary: The BD2K Python module kitchen sink
+Home-page: https://github.com/BD2KGenomics/bd2k-python-lib
+Author: Hannes Schmidt
+Author-email: hannes at ucsc.edu
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/src/bd2k_python_lib.egg-info/SOURCES.txt b/src/bd2k_python_lib.egg-info/SOURCES.txt
new file mode 100644
index 0000000..6bfe1f1
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/SOURCES.txt
@@ -0,0 +1,41 @@
+setup.cfg
+setup.py
+src/bd2k/__init__.py
+src/bd2k/util/__init__.py
+src/bd2k/util/collections.py
+src/bd2k/util/d32.py
+src/bd2k/util/d64.py
+src/bd2k/util/exceptions.py
+src/bd2k/util/expando.py
+src/bd2k/util/files.py
+src/bd2k/util/fnmatch.py
+src/bd2k/util/hashes.py
+src/bd2k/util/humanize.py
+src/bd2k/util/iterables.py
+src/bd2k/util/lockfile.py
+src/bd2k/util/logging.py
+src/bd2k/util/objects.py
+src/bd2k/util/processes.py
+src/bd2k/util/retry.py
+src/bd2k/util/shell.py
+src/bd2k/util/strings.py
+src/bd2k/util/threading.py
+src/bd2k/util/throttle.py
+src/bd2k/util/ec2/__init__.py
+src/bd2k/util/ec2/credentials.py
+src/bd2k/util/ec2/test/__init__.py
+src/bd2k/util/ec2/test/test_credentials.py
+src/bd2k/util/test/__init__.py
+src/bd2k/util/test/test_d32.py
+src/bd2k/util/test/test_d64.py
+src/bd2k/util/test/test_files.py
+src/bd2k/util/test/test_panic.py
+src/bd2k/util/test/test_strings.py
+src/bd2k/util/xml/__init__.py
+src/bd2k/util/xml/builder.py
+src/bd2k_python_lib.egg-info/PKG-INFO
+src/bd2k_python_lib.egg-info/SOURCES.txt
+src/bd2k_python_lib.egg-info/dependency_links.txt
+src/bd2k_python_lib.egg-info/namespace_packages.txt
+src/bd2k_python_lib.egg-info/pbr.json
+src/bd2k_python_lib.egg-info/top_level.txt
\ No newline at end of file
diff --git a/src/bd2k_python_lib.egg-info/dependency_links.txt b/src/bd2k_python_lib.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/bd2k_python_lib.egg-info/namespace_packages.txt b/src/bd2k_python_lib.egg-info/namespace_packages.txt
new file mode 100644
index 0000000..a3cbc43
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/namespace_packages.txt
@@ -0,0 +1 @@
+bd2k
diff --git a/src/bd2k_python_lib.egg-info/pbr.json b/src/bd2k_python_lib.egg-info/pbr.json
new file mode 100644
index 0000000..e47388c
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/pbr.json
@@ -0,0 +1 @@
+{"is_release": false, "git_version": "a662f3c"}
\ No newline at end of file
diff --git a/src/bd2k_python_lib.egg-info/top_level.txt b/src/bd2k_python_lib.egg-info/top_level.txt
new file mode 100644
index 0000000..a3cbc43
--- /dev/null
+++ b/src/bd2k_python_lib.egg-info/top_level.txt
@@ -0,0 +1 @@
+bd2k
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-bd2k.git
More information about the debian-med-commit
mailing list