[Python-modules-commits] [partd] 01/04: Import partd_0.3.6.orig.tar.gz

Diane Trout diane at moszumanska.debian.org
Wed Dec 21 21:40:29 UTC 2016


This is an automated email from the git hooks/post-receive script.

diane pushed a commit to branch master
in repository partd.

commit 2b3bc901c489020f5da4580d3a2613d704f073f1
Author: Diane Trout <diane at ghic.org>
Date:   Wed Dec 21 10:49:13 2016 -0800

    Import partd_0.3.6.orig.tar.gz
---
 MANIFEST.in                         |   6 +
 PKG-INFO                            | 139 ++++++++++++++
 README.rst                          | 129 +++++++++++++
 partd.egg-info/PKG-INFO             | 139 ++++++++++++++
 partd.egg-info/SOURCES.txt          |  36 ++++
 partd.egg-info/dependency_links.txt |   1 +
 partd.egg-info/not-zip-safe         |   1 +
 partd.egg-info/requires.txt         |   8 +
 partd.egg-info/top_level.txt        |   1 +
 partd/__init__.py                   |  19 ++
 partd/buffer.py                     | 124 +++++++++++++
 partd/compatibility.py              |  14 ++
 partd/compressed.py                 |  45 +++++
 partd/core.py                       |  92 ++++++++++
 partd/dict.py                       |  66 +++++++
 partd/encode.py                     |  47 +++++
 partd/file.py                       | 141 +++++++++++++++
 partd/numpy.py                      | 168 +++++++++++++++++
 partd/pandas.py                     | 162 +++++++++++++++++
 partd/pickle.py                     |  17 ++
 partd/python.py                     |  40 +++++
 partd/tests/test_buffer.py          |  54 ++++++
 partd/tests/test_compressed.py      |  31 ++++
 partd/tests/test_dict.py            |  41 +++++
 partd/tests/test_encode.py          |  28 +++
 partd/tests/test_file.py            |  75 ++++++++
 partd/tests/test_numpy.py           |  67 +++++++
 partd/tests/test_pandas.py          |  82 +++++++++
 partd/tests/test_partd.py           |  48 +++++
 partd/tests/test_pickle.py          |  29 +++
 partd/tests/test_python.py          |  14 ++
 partd/tests/test_utils.py           |  11 ++
 partd/tests/test_zmq.py             | 127 +++++++++++++
 partd/utils.py                      | 177 ++++++++++++++++++
 partd/zmq.py                        | 350 ++++++++++++++++++++++++++++++++++++
 requirements.txt                    |   2 +
 setup.cfg                           |   5 +
 setup.py                            |  19 ++
 38 files changed, 2555 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..e6971f4
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+recursive-include partd *.py
+
+include requirements.txt
+include setup.py
+include README.rst
+include MANIFEST.in
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..9aa4ae5
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,139 @@
+Metadata-Version: 1.0
+Name: partd
+Version: 0.3.6
+Summary: Appendable key-value storage
+Home-page: http://github.com/dask/partd/
+Author: Matthew Rocklin
+Author-email: mrocklin at gmail.com
+License: BSD
+Description: PartD
+        =====
+        
+        |Build Status| |Version Status|
+        
+        Key-value byte store with appendable values
+        
+            Partd stores key-value pairs.
+            Values are raw bytes.
+            We append on old values.
+        
+        Partd excels at shuffling operations.
+        
+        Operations
+        ----------
+        
+        PartD has two main operations, ``append`` and ``get``.
+        
+        
+        Example
+        -------
+        
+        1.  Create a Partd backed by a directory::
+        
+                >>> import partd
+                >>> p = partd.File('/path/to/new/dataset/')
+        
+        2.  Append key-byte pairs to dataset::
+        
+                >>> p.append({'x': b'Hello ', 'y': b'123'})
+                >>> p.append({'x': b'world!', 'y': b'456'})
+        
+        3.  Get bytes associated to keys::
+        
+                >>> p.get('x')         # One key
+                b'Hello world!'
+        
+                >>> p.get(['y', 'x'])  # List of keys
+                [b'123456', b'Hello world!']
+        
+        4.  Destroy partd dataset::
+        
+                >>> p.drop()
+        
+        That's it.
+        
+        
+        Implementations
+        ---------------
+        
+        We can back a partd by an in-memory dictionary::
+        
+            >>> p = Dict()
+        
+        For larger amounts of data or to share data between processes we back a partd
+        by a directory of files.  This uses file-based locks for consistency.::
+        
+            >>> p = File('/path/to/dataset/')
+        
+        However this can fail for many small writes.  In these cases you may wish to buffer one partd with another, keeping a fixed maximum of data in the buffering partd.  This writes the larger elements of the first partd to the second partd when space runs low::
+        
+            >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+        
+        You might also want to have many distributed process write to a single partd
+        consistently.  This can be done with a server
+        
+        *   Server Process::
+        
+                >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+                >>> s = Server(p, address='ipc://server')
+        
+        *   Worker processes::
+        
+                >>> p = Client('ipc://server')  # Client machine talks to remote server
+        
+        
+        Encodings and Compression
+        -------------------------
+        
+        Once we can robustly and efficiently append bytes to a partd we consider
+        compression and encodings.  This is generally available with the ``Encode``
+        partd, which accepts three functions, one to apply on bytes as they are
+        written, one to apply to bytes as they are read, and one to join bytestreams.
+        Common configurations already exist for common data and compression formats.
+        
+        We may wish to compress and decompress data transparently as we interact with a
+        partd.  Objects like ``BZ2``, ``Blosc``, ``ZLib`` and ``Snappy`` exist and take
+        another partd as an argument.::
+        
+            >>> p = File(...)
+            >>> p = ZLib(p)
+        
+        These work exactly as before, the (de)compression happens automatically.
+        
+        Common data formats like Python lists, numpy arrays, and pandas
+        dataframes are also supported out of the box.::
+        
+            >>> p = File(...)
+            >>> p = NumPy(p)
+            >>> p.append({'x': np.array([...])})
+        
+        This lets us forget about bytes and think instead in our normal data types.
+        
+        Composition
+        -----------
+        
+        In principle we want to compose all of these choices together
+        
+        1.  Write policy:  ``Dict``, ``File``, ``Buffer``, ``Client``
+        2.  Encoding:  ``Pickle``, ``Numpy``, ``Pandas``, ...
+        3.  Compression:  ``Blosc``, ``Snappy``, ...
+        
+        Partd objects compose by nesting.  Here we make a partd that writes pickle
+        encoded BZ2 compressed bytes directly to disk::
+        
+            >>> p = Pickle(BZ2(File('foo')))
+        
+        We could construct more complex systems that include compression,
+        serialization, buffering, and remote access.::
+        
+            >>> server = Server(Buffer(Dict(), File(), available_memory=2e0))
+        
+            >>> client = Pickle(Snappy(Client(server.address)))
+            >>> client.append({'x': [1, 2, 3]})
+        
+        .. |Build Status| image:: https://travis-ci.org/dask/partd.png
+           :target: https://travis-ci.org/dask/partd
+        .. |Version Status| image:: https://img.shields.io/pypi/v/partd.svg
+           :target: https://pypi.python.org/pypi/partd/
+        
+Platform: UNKNOWN
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..29015a4
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,129 @@
+PartD
+=====
+
+|Build Status| |Version Status|
+
+Key-value byte store with appendable values
+
+    Partd stores key-value pairs.
+    Values are raw bytes.
+    We append on old values.
+
+Partd excels at shuffling operations.
+
+Operations
+----------
+
+PartD has two main operations, ``append`` and ``get``.
+
+
+Example
+-------
+
+1.  Create a Partd backed by a directory::
+
+        >>> import partd
+        >>> p = partd.File('/path/to/new/dataset/')
+
+2.  Append key-byte pairs to dataset::
+
+        >>> p.append({'x': b'Hello ', 'y': b'123'})
+        >>> p.append({'x': b'world!', 'y': b'456'})
+
+3.  Get bytes associated to keys::
+
+        >>> p.get('x')         # One key
+        b'Hello world!'
+
+        >>> p.get(['y', 'x'])  # List of keys
+        [b'123456', b'Hello world!']
+
+4.  Destroy partd dataset::
+
+        >>> p.drop()
+
+That's it.
+
+
+Implementations
+---------------
+
+We can back a partd by an in-memory dictionary::
+
+    >>> p = Dict()
+
+For larger amounts of data or to share data between processes we back a partd
+by a directory of files.  This uses file-based locks for consistency.::
+
+    >>> p = File('/path/to/dataset/')
+
+However this can fail for many small writes.  In these cases you may wish to buffer one partd with another, keeping a fixed maximum of data in the buffering partd.  This writes the larger elements of the first partd to the second partd when space runs low::
+
+    >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+
+You might also want to have many distributed process write to a single partd
+consistently.  This can be done with a server
+
+*   Server Process::
+
+        >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+        >>> s = Server(p, address='ipc://server')
+
+*   Worker processes::
+
+        >>> p = Client('ipc://server')  # Client machine talks to remote server
+
+
+Encodings and Compression
+-------------------------
+
+Once we can robustly and efficiently append bytes to a partd we consider
+compression and encodings.  This is generally available with the ``Encode``
+partd, which accepts three functions, one to apply on bytes as they are
+written, one to apply to bytes as they are read, and one to join bytestreams.
+Common configurations already exist for common data and compression formats.
+
+We may wish to compress and decompress data transparently as we interact with a
+partd.  Objects like ``BZ2``, ``Blosc``, ``ZLib`` and ``Snappy`` exist and take
+another partd as an argument.::
+
+    >>> p = File(...)
+    >>> p = ZLib(p)
+
+These work exactly as before, the (de)compression happens automatically.
+
+Common data formats like Python lists, numpy arrays, and pandas
+dataframes are also supported out of the box.::
+
+    >>> p = File(...)
+    >>> p = NumPy(p)
+    >>> p.append({'x': np.array([...])})
+
+This lets us forget about bytes and think instead in our normal data types.
+
+Composition
+-----------
+
+In principle we want to compose all of these choices together
+
+1.  Write policy:  ``Dict``, ``File``, ``Buffer``, ``Client``
+2.  Encoding:  ``Pickle``, ``Numpy``, ``Pandas``, ...
+3.  Compression:  ``Blosc``, ``Snappy``, ...
+
+Partd objects compose by nesting.  Here we make a partd that writes pickle
+encoded BZ2 compressed bytes directly to disk::
+
+    >>> p = Pickle(BZ2(File('foo')))
+
+We could construct more complex systems that include compression,
+serialization, buffering, and remote access.::
+
+    >>> server = Server(Buffer(Dict(), File(), available_memory=2e0))
+
+    >>> client = Pickle(Snappy(Client(server.address)))
+    >>> client.append({'x': [1, 2, 3]})
+
+.. |Build Status| image:: https://travis-ci.org/dask/partd.png
+   :target: https://travis-ci.org/dask/partd
+.. |Version Status| image:: https://img.shields.io/pypi/v/partd.svg
+   :target: https://pypi.python.org/pypi/partd/
diff --git a/partd.egg-info/PKG-INFO b/partd.egg-info/PKG-INFO
new file mode 100644
index 0000000..9aa4ae5
--- /dev/null
+++ b/partd.egg-info/PKG-INFO
@@ -0,0 +1,139 @@
+Metadata-Version: 1.0
+Name: partd
+Version: 0.3.6
+Summary: Appendable key-value storage
+Home-page: http://github.com/dask/partd/
+Author: Matthew Rocklin
+Author-email: mrocklin at gmail.com
+License: BSD
+Description: PartD
+        =====
+        
+        |Build Status| |Version Status|
+        
+        Key-value byte store with appendable values
+        
+            Partd stores key-value pairs.
+            Values are raw bytes.
+            We append on old values.
+        
+        Partd excels at shuffling operations.
+        
+        Operations
+        ----------
+        
+        PartD has two main operations, ``append`` and ``get``.
+        
+        
+        Example
+        -------
+        
+        1.  Create a Partd backed by a directory::
+        
+                >>> import partd
+                >>> p = partd.File('/path/to/new/dataset/')
+        
+        2.  Append key-byte pairs to dataset::
+        
+                >>> p.append({'x': b'Hello ', 'y': b'123'})
+                >>> p.append({'x': b'world!', 'y': b'456'})
+        
+        3.  Get bytes associated to keys::
+        
+                >>> p.get('x')         # One key
+                b'Hello world!'
+        
+                >>> p.get(['y', 'x'])  # List of keys
+                [b'123456', b'Hello world!']
+        
+        4.  Destroy partd dataset::
+        
+                >>> p.drop()
+        
+        That's it.
+        
+        
+        Implementations
+        ---------------
+        
+        We can back a partd by an in-memory dictionary::
+        
+            >>> p = Dict()
+        
+        For larger amounts of data or to share data between processes we back a partd
+        by a directory of files.  This uses file-based locks for consistency.::
+        
+            >>> p = File('/path/to/dataset/')
+        
+        However this can fail for many small writes.  In these cases you may wish to buffer one partd with another, keeping a fixed maximum of data in the buffering partd.  This writes the larger elements of the first partd to the second partd when space runs low::
+        
+            >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+        
+        You might also want to have many distributed process write to a single partd
+        consistently.  This can be done with a server
+        
+        *   Server Process::
+        
+                >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
+                >>> s = Server(p, address='ipc://server')
+        
+        *   Worker processes::
+        
+                >>> p = Client('ipc://server')  # Client machine talks to remote server
+        
+        
+        Encodings and Compression
+        -------------------------
+        
+        Once we can robustly and efficiently append bytes to a partd we consider
+        compression and encodings.  This is generally available with the ``Encode``
+        partd, which accepts three functions, one to apply on bytes as they are
+        written, one to apply to bytes as they are read, and one to join bytestreams.
+        Common configurations already exist for common data and compression formats.
+        
+        We may wish to compress and decompress data transparently as we interact with a
+        partd.  Objects like ``BZ2``, ``Blosc``, ``ZLib`` and ``Snappy`` exist and take
+        another partd as an argument.::
+        
+            >>> p = File(...)
+            >>> p = ZLib(p)
+        
+        These work exactly as before, the (de)compression happens automatically.
+        
+        Common data formats like Python lists, numpy arrays, and pandas
+        dataframes are also supported out of the box.::
+        
+            >>> p = File(...)
+            >>> p = NumPy(p)
+            >>> p.append({'x': np.array([...])})
+        
+        This lets us forget about bytes and think instead in our normal data types.
+        
+        Composition
+        -----------
+        
+        In principle we want to compose all of these choices together
+        
+        1.  Write policy:  ``Dict``, ``File``, ``Buffer``, ``Client``
+        2.  Encoding:  ``Pickle``, ``Numpy``, ``Pandas``, ...
+        3.  Compression:  ``Blosc``, ``Snappy``, ...
+        
+        Partd objects compose by nesting.  Here we make a partd that writes pickle
+        encoded BZ2 compressed bytes directly to disk::
+        
+            >>> p = Pickle(BZ2(File('foo')))
+        
+        We could construct more complex systems that include compression,
+        serialization, buffering, and remote access.::
+        
+            >>> server = Server(Buffer(Dict(), File(), available_memory=2e0))
+        
+            >>> client = Pickle(Snappy(Client(server.address)))
+            >>> client.append({'x': [1, 2, 3]})
+        
+        .. |Build Status| image:: https://travis-ci.org/dask/partd.png
+           :target: https://travis-ci.org/dask/partd
+        .. |Version Status| image:: https://img.shields.io/pypi/v/partd.svg
+           :target: https://pypi.python.org/pypi/partd/
+        
+Platform: UNKNOWN
diff --git a/partd.egg-info/SOURCES.txt b/partd.egg-info/SOURCES.txt
new file mode 100644
index 0000000..42674af
--- /dev/null
+++ b/partd.egg-info/SOURCES.txt
@@ -0,0 +1,36 @@
+MANIFEST.in
+README.rst
+requirements.txt
+setup.py
+partd/__init__.py
+partd/buffer.py
+partd/compatibility.py
+partd/compressed.py
+partd/core.py
+partd/dict.py
+partd/encode.py
+partd/file.py
+partd/numpy.py
+partd/pandas.py
+partd/pickle.py
+partd/python.py
+partd/utils.py
+partd/zmq.py
+partd.egg-info/PKG-INFO
+partd.egg-info/SOURCES.txt
+partd.egg-info/dependency_links.txt
+partd.egg-info/not-zip-safe
+partd.egg-info/requires.txt
+partd.egg-info/top_level.txt
+partd/tests/test_buffer.py
+partd/tests/test_compressed.py
+partd/tests/test_dict.py
+partd/tests/test_encode.py
+partd/tests/test_file.py
+partd/tests/test_numpy.py
+partd/tests/test_pandas.py
+partd/tests/test_partd.py
+partd/tests/test_pickle.py
+partd/tests/test_python.py
+partd/tests/test_utils.py
+partd/tests/test_zmq.py
\ No newline at end of file
diff --git a/partd.egg-info/dependency_links.txt b/partd.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/partd.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/partd.egg-info/not-zip-safe b/partd.egg-info/not-zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/partd.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/partd.egg-info/requires.txt b/partd.egg-info/requires.txt
new file mode 100644
index 0000000..dc90b38
--- /dev/null
+++ b/partd.egg-info/requires.txt
@@ -0,0 +1,8 @@
+locket
+toolz
+
+[complete]
+numpy >= 1.9.0
+pandas
+pyzmq
+blosc
diff --git a/partd.egg-info/top_level.txt b/partd.egg-info/top_level.txt
new file mode 100644
index 0000000..dfcaaf4
--- /dev/null
+++ b/partd.egg-info/top_level.txt
@@ -0,0 +1 @@
+partd
diff --git a/partd/__init__.py b/partd/__init__.py
new file mode 100644
index 0000000..c14f6d7
--- /dev/null
+++ b/partd/__init__.py
@@ -0,0 +1,19 @@
+from __future__ import absolute_import
+
+from .file import File
+from .dict import Dict
+from .buffer import Buffer
+from .encode import Encode
+from .pickle import Pickle
+from .python import Python
+from .compressed import *
+from .utils import ignoring
+with ignoring(ImportError):
+    from .numpy import Numpy
+with ignoring(ImportError):
+    from .pandas import PandasColumns, PandasBlocks
+with ignoring(ImportError):
+    from .zmq import Client, Server
+
+
+__version__ = '0.3.6'
diff --git a/partd/buffer.py b/partd/buffer.py
new file mode 100644
index 0000000..b4ef65f
--- /dev/null
+++ b/partd/buffer.py
@@ -0,0 +1,124 @@
+from .core import Interface
+from threading import Lock
+from toolz import merge_with, topk, accumulate, pluck
+from operator import add
+from bisect import bisect
+from collections import defaultdict
+from .compatibility import Queue, Empty
+
+
+def zero():
+    return 0
+
+class Buffer(Interface):
+    def __init__(self, fast, slow, available_memory=1e9):
+        self.lock = Lock()
+        self.fast = fast
+        self.slow = slow
+        self.available_memory = available_memory
+        self.lengths = defaultdict(zero)
+        self.memory_usage = 0
+        Interface.__init__(self)
+
+    def __getstate__(self):
+        return {'fast': self.fast,
+                'slow': self.slow,
+                'memory_usage': self.memory_usage,
+                'lengths': self.lengths,
+                'available_memory': self.available_memory}
+
+    def __setstate__(self, state):
+        Interface.__setstate__(self, state)
+        self.lock = Lock()
+        self.__dict__.update(state)
+
+    def append(self, data, lock=True, **kwargs):
+        if lock: self.lock.acquire()
+        try:
+            for k, v in data.items():
+                self.lengths[k] += len(v)
+                self.memory_usage += len(v)
+            self.fast.append(data, lock=False, **kwargs)
+
+            while self.memory_usage > self.available_memory:
+                keys = keys_to_flush(self.lengths, 0.1, maxcount=20)
+                self.flush(keys)
+
+        finally:
+            if lock: self.lock.release()
+
+    def _get(self, keys, lock=True, **kwargs):
+        if lock: self.lock.acquire()
+        try:
+            result = list(map(add, self.fast.get(keys, lock=False),
+                                   self.slow.get(keys, lock=False)))
+        finally:
+            if lock: self.lock.release()
+        return result
+
+    def _iset(self, key, value, lock=True):
+        """ Idempotent set """
+        if lock: self.lock.acquire()
+        try:
+            self.fast.iset(key, value, lock=False)
+        finally:
+            if lock: self.lock.release()
+
+    def _delete(self, keys, lock=True):
+        if lock: self.lock.acquire()
+        try:
+            self.fast.delete(keys, lock=False)
+            self.slow.delete(keys, lock=False)
+        finally:
+            if lock: self.lock.release()
+
+    def drop(self):
+        self._iset_seen.clear()
+        self.fast.drop()
+        self.slow.drop()
+
+    def __exit__(self, *args):
+        self.drop()
+
+    def flush(self, keys=None, block=None):
+        """ Flush keys to disk
+
+        Parameters
+        ----------
+
+        keys: list or None
+            list of keys to flush
+        block: bool (defaults to None)
+            Whether or not to block until all writing is complete
+
+        If no keys are given then flush all keys
+        """
+        if keys is None:
+            keys = list(self.lengths)
+
+        self.slow.append(dict(zip(keys, self.fast.get(keys))))
+        self.fast.delete(keys)
+
+        for key in keys:
+            self.memory_usage -= self.lengths[key]
+            del self.lengths[key]
+
+
+def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
+    """ Which keys to remove
+
+    >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
+    ...            'e': 10, 'f': 25, 'g': 5}
+    >>> keys_to_flush(lengths, 0.5)
+    ['f', 'a']
+    """
+    top = topk(max(len(lengths) // 2, 1),
+               lengths.items(),
+               key=1)
+    total = sum(lengths.values())
+    cutoff = min(maxcount, max(1,
+                   bisect(list(accumulate(add, pluck(1, top))),
+                          total * fraction)))
+    result = [k for k, v in top[:cutoff]]
+    assert result
+    return result
diff --git a/partd/compatibility.py b/partd/compatibility.py
new file mode 100644
index 0000000..20a217a
--- /dev/null
+++ b/partd/compatibility.py
@@ -0,0 +1,14 @@
+from __future__ import absolute_import
+
+import sys
+
+if sys.version_info[0] == 3:
+    from io import StringIO
+    unicode = str
+    import pickle
+    from queue import Queue, Empty
+if sys.version_info[0] == 2:
+    from StringIO import StringIO
+    unicode = unicode
+    import cPickle as pickle
+    from Queue import Queue, Empty
diff --git a/partd/compressed.py b/partd/compressed.py
new file mode 100644
index 0000000..a73f77b
--- /dev/null
+++ b/partd/compressed.py
@@ -0,0 +1,45 @@
+from .utils import ignoring
+from .encode import Encode
+from functools import partial
+
+__all__ = []
+
+
+def bytes_concat(L):
+    return b''.join(L)
+
+
+with ignoring(ImportError):
+    import snappy
+    Snappy = partial(Encode,
+                     snappy.compress,
+                     snappy.decompress,
+                     bytes_concat)
+    __all__.append('Snappy')
+
+
+with ignoring(ImportError):
+    import zlib
+    ZLib = partial(Encode,
+                   zlib.compress,
+                   zlib.decompress,
+                   bytes_concat)
+    __all__.append('ZLib')
+
+
+with ignoring(ImportError):
+    import bz2
+    BZ2 = partial(Encode,
+                  bz2.compress,
+                  bz2.decompress,
+                  bytes_concat)
+    __all__.append('BZ2')
+
+
+with ignoring(ImportError):
+    import blosc
+    Blosc = partial(Encode,
+                    blosc.compress,
+                    blosc.decompress,
+                    bytes_concat)
+    __all__.append('Blosc')
diff --git a/partd/core.py b/partd/core.py
new file mode 100644
index 0000000..a3f9376
--- /dev/null
+++ b/partd/core.py
@@ -0,0 +1,92 @@
+from __future__ import absolute_import
+
+import os
+import shutil
+import locket
+import string
+from toolz import memoize
+from contextlib import contextmanager
+from .utils import nested_get, flatten
+
+
+
+# http://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename-in-python
+valid_chars = "-_.() " + string.ascii_letters + string.digits + os.path.sep
+
+
+def escape_filename(fn):
+    """ Escape text so that it is a valid filename
+
+    >>> escape_filename('Foo!bar?')
+    'Foobar'
+
+    """
+    return ''.join(filter(valid_chars.__contains__, fn))
+
+
+def filename(path, key):
+    return os.path.join(path, escape_filename(token(key)))
+
+
+def token(key):
+    """
+
+    >>> token('hello')
+    'hello'
+    >>> token(('hello', 'world'))  # doctest: +SKIP
+    'hello/world'
+    """
+    if isinstance(key, str):
+        return key
+    elif isinstance(key, tuple):
+        return os.path.join(*map(token, key))
+    else:
+        return str(key)
+
+
+class Interface(object):
+    def __init__(self):
+        self._iset_seen = set()
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self._iset_seen = set()
+
+    def iset(self, key, value, **kwargs):
+        if key in self._iset_seen:
+            return
+        else:
+            self._iset(key, value, **kwargs)
+            self._iset_seen.add(key)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.drop()
+
+    def iget(self, key):
+        return self._get([key], lock=False)[0]
+
+    def get(self, keys, **kwargs):
+        if not isinstance(keys, list):
+            return self.get([keys], **kwargs)[0]
+        elif any(isinstance(key, list) for key in keys):  # nested case
+            flatkeys = list(flatten(keys))
+            result = self.get(flatkeys, **kwargs)
+            return nested_get(keys, dict(zip(flatkeys, result)))
+        else:
+            return self._get(keys, **kwargs)
+
+    def delete(self, keys, **kwargs):
+        if not isinstance(keys, list):
+            return self._delete([keys], **kwargs)
+        else:
+            return self._delete(keys, **kwargs)
+
+    def pop(self, keys, **kwargs):
+        with self.partd.lock:
+            result = self.partd.get(keys, lock=False)
+            self.partd.delete(keys, lock=False)
+        return result
+
diff --git a/partd/dict.py b/partd/dict.py
new file mode 100644
index 0000000..de7e427
--- /dev/null
+++ b/partd/dict.py
@@ -0,0 +1,66 @@
+from .core import Interface
+from threading import Lock
+
+
+class Dict(Interface):
+    def __init__(self):
+        self.lock = Lock()
+        self.data = dict()
+        Interface.__init__(self)
+
+    def __getstate__(self):
+        return {'data': self.data}
+
+    def __setstate__(self, state):
+        Interface.__setstate__(self, state)
+        Dict.__init__(self)
+        self.data = state['data']
+
+    def append(self, data, lock=True, **kwargs):
+        if lock: self.lock.acquire()
+        try:
+            for k, v in data.items():
+                if k not in self.data:
+                    self.data[k] = []
+                self.data[k].append(v)
+        finally:
+            if lock: self.lock.release()
+
+    def _get(self, keys, lock=True, **kwargs):
+        assert isinstance(keys, (list, tuple, set))
+        if lock:
+            self.lock.acquire()
+        try:
+            result = [b''.join(self.data.get(key, [])) for key in keys]
+        finally:
+            if lock:
+                self.lock.release()
+        return result
+
+    def _iset(self, key, value, lock=True):
+        """ Idempotent set """
+        if lock:
+            self.lock.acquire()
+        try:
+            self.data[key] = [value]
+        finally:
+            if lock:
+                self.lock.release()
+
+    def _delete(self, keys, lock=True):
+        if lock:
+            self.lock.acquire()
+        try:
+            for key in keys:
+                if key in self.data:
+                    del self.data[key]
+        finally:
+            if lock:
+                self.lock.release()
+
+    def drop(self):
+        self._iset_seen.clear()
+        self.data.clear()
+
+    def __exit__(self, *args):
+        self.drop()
diff --git a/partd/encode.py b/partd/encode.py
new file mode 100644
index 0000000..3e3a15f
--- /dev/null
+++ b/partd/encode.py
@@ -0,0 +1,47 @@
+from .core import Interface
+from .file import File
+from toolz import valmap
+from .utils import frame, framesplit
+
+
+class Encode(Interface):
+    def __init__(self, encode, decode, join, partd=None):
+        if not partd or isinstance(partd, str):
+            partd = File(partd)
+        self.partd = partd
+        self.encode = encode
+        self.decode = decode
+        self.join = join
+        Interface.__init__(self)
+
+    def __getstate__(self):
+        return self.__dict__
+
+    __setstate__ = Interface.__setstate__
+
+    def append(self, data, **kwargs):
+        data = valmap(self.encode, data)
+        data = valmap(frame, data)
+        self.partd.append(data, **kwargs)
+
+    def _get(self, keys, **kwargs):
+        raw = self.partd._get(keys, **kwargs)
+        return [self.join([self.decode(frame) for frame in framesplit(chunk)])
+                for chunk in raw]
+
+    def delete(self, keys, **kwargs):
+        return self.partd.delete(keys, **kwargs)
+
+    def _iset(self, key, value, **kwargs):
+        return self.partd.iset(key, frame(self.encode(value)), **kwargs)
+
... 1830 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/partd.git



More information about the Python-modules-commits mailing list