[Python-modules-commits] [dask] 02/08: New upstream version 0.15.1

Diane Trout diane at moszumanska.debian.org
Wed Jul 26 22:02:03 UTC 2017


This is an automated email from the git hooks/post-receive script.

diane pushed a commit to branch master
in repository dask.

commit f5cbe3f67f4a1a20e0f3979a738d809300acf0c1
Author: Diane Trout <diane at ghic.org>
Date:   Thu Jul 20 12:04:48 2017 -0700

    New upstream version 0.15.1
---
 .travis.yml                                        |  10 +-
 MANIFEST.in                                        |   4 +-
 continuous_integration/travis/install.sh           |  26 +-
 continuous_integration/travis/run_tests.sh         |   2 +
 dask/__init__.py                                   |   8 +-
 dask/array/__init__.py                             |  18 +-
 dask/array/core.py                                 | 492 +++++++++-------
 dask/array/fft.py                                  |  67 +++
 dask/array/ghost.py                                |   4 +-
 dask/array/optimization.py                         | 121 ++--
 dask/array/percentile.py                           |   4 -
 dask/array/random.py                               |   2 +-
 dask/array/reductions.py                           | 149 +++--
 dask/array/slicing.py                              | 206 ++++---
 dask/array/stats.py                                | 432 ++++++++++++++
 dask/array/tests/test_array_core.py                | 224 +++++--
 dask/array/tests/test_fft.py                       |  79 +++
 dask/array/tests/test_ghost.py                     |   6 +-
 dask/array/tests/test_learn.py                     |   5 +-
 dask/array/tests/test_linalg.py                    |  56 +-
 dask/array/tests/test_optimization.py              | 154 +++--
 dask/array/tests/test_percentiles.py               |   3 +-
 dask/array/tests/test_reductions.py                |  71 ++-
 dask/array/tests/test_slicing.py                   |  75 ++-
 dask/array/tests/test_sparse.py                    |  16 +-
 dask/array/tests/test_stats.py                     | 135 +++++
 dask/array/tests/test_ufunc.py                     | 220 +++++--
 dask/array/ufunc.py                                | 232 +++++---
 dask/array/utils.py                                |   2 +-
 dask/array/wrap.py                                 |  12 -
 dask/async.py                                      | 648 +--------------------
 dask/bag/__init__.py                               |   3 +-
 dask/bag/core.py                                   | 625 +++++++++++++++-----
 dask/bag/tests/test_bag.py                         | 248 ++++++--
 dask/bag/text.py                                   |   2 +-
 dask/base.py                                       |  34 +-
 dask/bytes/core.py                                 |  48 +-
 dask/bytes/local.py                                |   8 +-
 dask/bytes/s3.py                                   |   6 +-
 dask/bytes/tests/test_bytes_utils.py               |  62 +-
 dask/bytes/tests/test_local.py                     |  31 +
 dask/bytes/utils.py                                |  53 +-
 dask/callbacks.py                                  |  18 +
 dask/compatibility.py                              |  15 +
 dask/dataframe/core.py                             |  85 +--
 dask/dataframe/groupby.py                          |  18 +-
 dask/dataframe/io/csv.py                           |  63 +-
 dask/dataframe/io/demo.py                          |   6 +-
 dask/dataframe/io/hdf.py                           |  14 +-
 dask/dataframe/io/io.py                            |  13 +-
 dask/dataframe/io/parquet.py                       | 118 ++--
 dask/dataframe/io/tests/test_csv.py                |  48 +-
 dask/dataframe/io/tests/test_demo.py               |   2 +
 dask/dataframe/io/tests/test_hdf.py                |   6 +-
 dask/dataframe/io/tests/test_io.py                 |  15 +-
 dask/dataframe/io/tests/test_parquet.py            |  87 ++-
 dask/dataframe/methods.py                          |  14 +-
 dask/dataframe/multi.py                            |   2 +
 dask/dataframe/shuffle.py                          |  23 +
 dask/dataframe/tests/test_arithmetics_reduction.py |  12 +-
 dask/dataframe/tests/test_categorical.py           |   6 +-
 dask/dataframe/tests/test_dataframe.py             | 106 +++-
 dask/dataframe/tests/test_groupby.py               | 113 ++--
 dask/dataframe/tests/test_hyperloglog.py           |   4 +-
 dask/dataframe/tests/test_indexing.py              |  14 +
 dask/dataframe/tests/test_multi.py                 |  51 +-
 dask/dataframe/tests/test_rolling.py               |  47 +-
 dask/dataframe/tests/test_shuffle.py               |  31 +-
 dask/dataframe/tests/test_ufunc.py                 | 128 ++--
 dask/dataframe/tests/test_utils_dataframe.py       |  12 +-
 dask/dataframe/utils.py                            |  54 +-
 dask/delayed.py                                    |  48 +-
 dask/diagnostics/tests/test_progress.py            |   2 +-
 dask/distributed.py                                |   4 +-
 dask/hashing.py                                    | 102 ++++
 dask/{async.py => local.py}                        | 251 +++-----
 dask/multiprocessing.py                            |  96 ++-
 dask/tests/test_base.py                            |  19 +-
 dask/tests/test_cache.py                           |   2 +-
 dask/tests/test_callbacks.py                       |  35 +-
 dask/tests/test_delayed.py                         |  16 +-
 dask/tests/test_distributed.py                     |   4 +-
 dask/tests/test_hashing.py                         |  45 ++
 dask/tests/{test_async.py => test_local.py}        |  20 +-
 dask/tests/test_multiprocessing.py                 |  14 +-
 dask/tests/test_sizeof.py                          |   9 +-
 dask/tests/test_threaded.py                        |   5 +-
 dask/tests/test_utils.py                           | 197 +++----
 dask/threaded.py                                   |  19 +-
 dask/utils.py                                      | 460 +++++----------
 docs/release-procedure.md                          |   9 +
 docs/requirements-docs.txt                         |   3 +-
 docs/source/array-api.rst                          |  40 ++
 docs/source/array-creation.rst                     |   2 +-
 docs/source/array-overview.rst                     |   4 +-
 docs/source/array-stats.rst                        |  36 ++
 docs/source/array.rst                              |   1 +
 docs/source/bag-api.rst                            |  13 +-
 docs/source/changelog.rst                          |  44 ++
 docs/source/conf.py                                |   2 +-
 docs/source/dataframe-api.rst                      |   1 -
 docs/source/dataframe-create.rst                   |   2 +-
 docs/source/dataframe-design.rst                   |   2 +-
 docs/source/debugging.rst                          |   2 +-
 docs/source/diagnostics.rst                        |   7 +-
 docs/source/examples-tutorials.rst                 |   2 +
 docs/source/examples/array-extend.rst              |   4 +-
 docs/source/faq.rst                                |  10 +-
 docs/source/futures.rst                            | 525 +++++++++++++++++
 docs/source/index.rst                              |  12 +-
 docs/source/install.rst                            |  18 +-
 docs/source/remote-data-services.rst               |   2 +-
 docs/source/scheduler-choice.rst                   |  27 +-
 docs/source/scheduler-overview.rst                 |   6 +-
 docs/source/scheduling-policy.rst                  |   7 +-
 docs/source/scripts/scheduling.py                  |   6 +-
 docs/source/shared.rst                             |   4 +-
 setup.py                                           |   4 +-
 118 files changed, 5081 insertions(+), 2790 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 911d6f7..ea6c0bc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,14 +1,14 @@
 language: generic
 sudo: false
-os: linux
+dist: trusty
 
 env:
   matrix:
-    - PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+    - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
+    - PYTHON=2.7 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
     - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.5 NUMPY=1.11.3 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+    - PYTHON=3.5 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
+    - PYTHON=3.6 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
 
 matrix:
   fast_finish: true
diff --git a/MANIFEST.in b/MANIFEST.in
index fbd1ed7..c0dbaa0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,11 @@
 recursive-include dask *.py
-recursive-include docs *.rst
+recursive-include docs/source *
+include docs/Makefile docs/make.bat
 
 include setup.py
 include README.rst
 include LICENSE.txt
 include MANIFEST.in
 
-prune docs/_build
 include versioneer.py
 include dask/_version.py
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index 37aedcf..d3bb215 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -26,44 +26,38 @@ echo "numpy $NUMPY" >> $CONDA_PREFIX/conda-meta/pinned
 echo "pandas $PANDAS" >> $CONDA_PREFIX/conda-meta/pinned
 
 # Install dependencies.
-# XXX: Due to a weird conda dependency resolution issue, we need to install
-# dependencies in two separate calls, otherwise we sometimes get version
-# incompatible with the installed version of numpy leading to crashes. This
-# seems to have to do with differences between conda-forge and defaults.
 conda install -q -c conda-forge \
     numpy \
     pandas \
     bcolz \
     blosc \
     bokeh \
+    boto3 \
     chest \
+    cloudpickle \
     coverage \
     cytoolz \
+    distributed \
     graphviz \
     h5py \
     ipython \
     partd \
     psutil \
-    pyarrow \
     pytables \
-    pytest \
+    "pytest<=3.1.1" \
     scikit-image \
     scikit-learn \
     scipy \
     sqlalchemy \
     toolz
 
-# Specify numpy/pandas here to prevent upgrade/downgrade
-conda install -q -c conda-forge \
-    distributed \
-    cloudpickle \
-
 pip install -q git+https://github.com/dask/partd --upgrade --no-deps
 pip install -q git+https://github.com/dask/zict --upgrade --no-deps
 pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
 pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
+pip install -q git+https://github.com/dask/s3fs --upgrade --no-deps
 
-if [[ $PYTHONOPTIMIZE != '2' ]]; then
+if [[ $PYTHONOPTIMIZE != '2' ]] && [[ $NUMPY > '1.11.0' ]] && [[ $NUMPY < '1.13.0' ]]; then
     conda install -q -c conda-forge numba cython
     pip install -q git+https://github.com/dask/fastparquet
 fi
@@ -76,12 +70,18 @@ pip install -q \
     cachey \
     graphviz \
     moto \
+    pyarrow \
     --upgrade --no-deps
 
 pip install -q \
+    cityhash \
     flake8 \
+    mmh3 \
     pandas_datareader \
-    pytest-xdist
+    pytest-xdist \
+    xxhash
 
 # Install dask
 pip install -q --no-deps -e .[complete]
+echo pip freeze
+pip freeze
diff --git a/continuous_integration/travis/run_tests.sh b/continuous_integration/travis/run_tests.sh
index 54bd62b..6c8c9c9 100644
--- a/continuous_integration/travis/run_tests.sh
+++ b/continuous_integration/travis/run_tests.sh
@@ -6,7 +6,9 @@ if [[ $PARALLEL == 'true' ]]; then
 fi
 
 if [[ $COVERAGE == 'true' ]]; then
+    echo "coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS"
     coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS
 else
+    echo "py.test dask --runslow --verbose $XTRATESTARGS"
     py.test dask --runslow --verbose $XTRATESTARGS
 fi
diff --git a/dask/__init__.py b/dask/__init__.py
index 30a3045..d81af59 100644
--- a/dask/__init__.py
+++ b/dask/__init__.py
@@ -2,9 +2,9 @@ from __future__ import absolute_import, division, print_function
 
 from .core import istask
 from .context import set_options
-from .async import get_sync as get
+from .local import get_sync as get
 try:
-    from .delayed import do, delayed
+    from .delayed import delayed
 except ImportError:
     pass
 try:
@@ -12,6 +12,10 @@ try:
 except ImportError:
     pass
 
+# dask.async is deprecated. For now we import it to the top namespace to be
+# compatible with prior releases. This should be removed in a future release:
+import dask.async
+
 from ._version import get_versions
 versions = get_versions()
 __version__ = versions['version']
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 22270ac..483dbd3 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -6,17 +6,21 @@ from .core import (Array, stack, concatenate, take, tensordot, transpose,
         roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
         digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
         dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
-        from_delayed, round, swapaxes, repeat, asarray)
+        from_delayed, round, swapaxes, repeat, asarray, asanyarray)
 from .core import (around, isnull, notnull, isclose, eye, triu,
                    tril, diag, corrcoef)
 from .reshape import reshape
-from .ufunc import (logaddexp, logaddexp2, conj, exp, log, log2, log10, log1p,
-        expm1, sqrt, square, sin, cos, tan, arcsin, arccos, arctan, arctan2,
-        hypot, sinh, cosh, tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg,
-        logical_and, logical_or, logical_xor, logical_not, maximum, minimum,
+from .ufunc import (add, subtract, multiply, divide, logaddexp, logaddexp2,
+        true_divide, floor_divide, negative, power, remainder, mod, conj, exp,
+        exp2, log, log2, log10, log1p, expm1, sqrt, square, cbrt, reciprocal,
+        sin, cos, tan, arcsin, arccos, arctan, arctan2, hypot, sinh, cosh,
+        tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg, greater,
+        greater_equal, less, less_equal, not_equal, equal, logical_and,
+        logical_or, logical_xor, logical_not, maximum, minimum,
         fmax, fmin, isreal, iscomplex, isfinite, isinf, isnan, signbit,
-        copysign, nextafter, ldexp, fmod, floor, ceil, trunc, degrees, radians,
-        rint, fix, angle, real, imag, clip, fabs, sign, absolute, frexp, modf)
+        copysign, nextafter, spacing, ldexp, fmod, floor, ceil, trunc, degrees,
+        radians, rint, fix, angle, real, imag, clip, fabs, sign, absolute,
+        i0, sinc, nan_to_num, frexp, modf, divide)
 from .reductions import (sum, prod, mean, std, var, any, all, min, max, vnorm,
                          moment,
                          argmin, argmax,
diff --git a/dask/array/core.py b/dask/array/core.py
index d9d47f4..d7d5ac3 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -1,11 +1,12 @@
 from __future__ import absolute_import, division, print_function
 
 from bisect import bisect
-from collections import Iterable, MutableMapping, Mapping
+from collections import Iterable, Mapping
 from collections import Iterator
 from functools import partial, wraps
 import inspect
 from itertools import product
+import math
 from numbers import Integral, Number
 import operator
 from operator import add, getitem, mul
@@ -44,23 +45,18 @@ from .. import sharedict
 from ..sharedict import ShareDict
 
 
-def getarray(a, b, lock=None):
-    """ Mimics getitem but includes call to np.asarray
-
-    >>> getarray([1, 2, 3, 4, 5], slice(1, 4))
-    array([2, 3, 4])
-    """
+def getter(a, b, asarray=True, lock=None):
     if isinstance(b, tuple) and any(x is None for x in b):
         b2 = tuple(x for x in b if x is not None)
         b3 = tuple(None if x is None else slice(None, None)
                    for x in b if not isinstance(x, (int, long)))
-        return getarray(a, b2, lock)[b3]
+        return getter(a, b2, asarray=asarray, lock=lock)[b3]
 
     if lock:
         lock.acquire()
     try:
         c = a[b]
-        if type(c) != np.ndarray:
+        if asarray:
             c = np.asarray(c)
     finally:
         if lock:
@@ -68,17 +64,35 @@ def getarray(a, b, lock=None):
     return c
 
 
-def getarray_nofancy(a, b, lock=None):
-    """ A simple wrapper around ``getarray``.
+def getter_nofancy(a, b, asarray=True, lock=None):
+    """ A simple wrapper around ``getter``.
 
     Used to indicate to the optimization passes that the backend doesn't
-    support "fancy indexing"
+    support fancy indexing.
     """
-    return getarray(a, b, lock=lock)
+    return getter(a, b, asarray=asarray, lock=lock)
+
+
+def getter_inline(a, b, asarray=True, lock=None):
+    """ A getter function that optimizations feel comfortable inlining
+
+    Slicing operations with this function may be inlined into a graph, such as
+    in the following rewrite
 
+    **Before**
 
-def getarray_inline(a, b, lock=None):
-    return getarray(a, b, lock=lock)
+    >>> a = x[:10]  # doctest: +SKIP
+    >>> b = a + 1  # doctest: +SKIP
+    >>> c = a * 2  # doctest: +SKIP
+
+    **After**
+
+    >>> b = x[:10] + 1  # doctest: +SKIP
+    >>> c = x[:10] * 2  # doctest: +SKIP
+
+    This inlining can be relevant to operations when running off of disk.
+    """
+    return getter(a, b, asarray=asarray, lock=lock)
 
 
 from .optimization import optimize, fuse_slice
@@ -102,20 +116,21 @@ def slices_from_chunks(chunks):
             for start, shape in zip(starts, shapes)]
 
 
-def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
+def getem(arr, chunks, getitem=getter, shape=None, out_name=None, lock=False,
+          asarray=True):
     """ Dask getting various chunks from an array-like
 
     >>> getem('X', chunks=(2, 3), shape=(4, 6))  # doctest: +SKIP
-    {('X', 0, 0): (getarray, 'X', (slice(0, 2), slice(0, 3))),
-     ('X', 1, 0): (getarray, 'X', (slice(2, 4), slice(0, 3))),
-     ('X', 1, 1): (getarray, 'X', (slice(2, 4), slice(3, 6))),
-     ('X', 0, 1): (getarray, 'X', (slice(0, 2), slice(3, 6)))}
+    {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))),
+     ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))),
+     ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))),
+     ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))}
 
     >>> getem('X', chunks=((2, 2), (3, 3)))  # doctest: +SKIP
-    {('X', 0, 0): (getarray, 'X', (slice(0, 2), slice(0, 3))),
-     ('X', 1, 0): (getarray, 'X', (slice(2, 4), slice(0, 3))),
-     ('X', 1, 1): (getarray, 'X', (slice(2, 4), slice(3, 6))),
-     ('X', 0, 1): (getarray, 'X', (slice(0, 2), slice(3, 6)))}
+    {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))),
+     ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))),
+     ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))),
+     ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))}
     """
     out_name = out_name or arr
     chunks = normalize_chunks(chunks, shape)
@@ -123,9 +138,10 @@ def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
     keys = list(product([out_name], *[range(len(bds)) for bds in chunks]))
     slices = slices_from_chunks(chunks)
 
-    if lock:
-        values = [(getitem, arr, x, lock) for x in slices]
+    if not asarray or lock:
+        values = [(getitem, arr, x, asarray, lock) for x in slices]
     else:
+        # Common case, drop extra parameters
         values = [(getitem, arr, x) for x in slices]
 
     return dict(zip(keys, values))
@@ -473,7 +489,8 @@ def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
     args = [np.ones((1,) * x.ndim, dtype=x.dtype)
             if isinstance(x, Array) else x for x in args]
     try:
-        o = func(*args, **kwargs)
+        with np.errstate(all='ignore'):
+            o = func(*args, **kwargs)
     except Exception as e:
         exc_type, exc_value, exc_traceback = sys.exc_info()
         tb = ''.join(traceback.format_tb(exc_traceback))
@@ -515,9 +532,13 @@ def map_blocks(func, *args, **kwargs):
     new_axis : number or iterable, optional
         New dimensions created by the function. Note that these are applied
         after ``drop_axis`` (if present).
+    token : string, optional
+        The key prefix to use for the output array. If not provided, will be
+        determined from the function name.
     name : string, optional
-        The key name to use for the array. If not provided, will be determined
-        by a hash of the arguments.
+        The key name to use for the output array. Note that this fully
+        specifies the output key name, and must be unique. If not provided,
+        will be determined by a hash of the arguments.
     **kwargs :
         Other keyword arguments to pass to function. Values must be constants
         (not dask.arrays)
@@ -590,10 +611,11 @@ def map_blocks(func, *args, **kwargs):
     >>> def func(block, block_id=None):
     ...     pass
 
-    You may specify the name of the resulting task in the graph with the
-    optional ``name`` keyword argument.
+    You may specify the key name prefix of the resulting task in the graph with
+    the optional ``token`` keyword argument.
 
-    >>> y = x.map_blocks(lambda x: x + 1, name='increment')
+    >>> x.map_blocks(lambda x: x + 1, token='increment')  # doctest: +SKIP
+    dask.array<increment, shape=(100,), dtype=int64, chunksize=(10,)>
     """
     if not callable(func):
         msg = ("First argument must be callable function, not %s\n"
@@ -601,7 +623,10 @@ def map_blocks(func, *args, **kwargs):
                "   or:   da.map_blocks(function, x, y, z)")
         raise TypeError(msg % type(func).__name__)
     name = kwargs.pop('name', None)
-    name = name or '%s-%s' % (funcname(func), tokenize(func, args, **kwargs))
+    token = kwargs.pop('token', None)
+    if not name:
+        name = '%s-%s' % (token or funcname(func),
+                          tokenize(token or func, args, **kwargs))
     dtype = kwargs.pop('dtype', None)
     chunks = kwargs.pop('chunks', None)
     drop_axis = kwargs.pop('drop_axis', [])
@@ -1019,6 +1044,34 @@ class Array(Base):
     def __len__(self):
         return sum(self.chunks[0])
 
+    def __array_ufunc__(self, numpy_ufunc, method, *inputs, **kwargs):
+        out = kwargs.get('out', ())
+        for x in inputs + out:
+            if not isinstance(x, (np.ndarray, Number, Array)):
+                return NotImplemented
+
+        if method == '__call__':
+            if numpy_ufunc.signature is not None:
+                return NotImplemented
+            if numpy_ufunc.nout > 1:
+                from . import ufunc
+                try:
+                    da_ufunc = getattr(ufunc, numpy_ufunc.__name__)
+                except AttributeError:
+                    return NotImplemented
+                return da_ufunc(*inputs, **kwargs)
+            else:
+                return elemwise(numpy_ufunc, *inputs, **kwargs)
+        elif method == 'outer':
+            from . import ufunc
+            try:
+                da_ufunc = getattr(ufunc, numpy_ufunc.__name__)
+            except AttributeError:
+                return NotImplemented
+            return da_ufunc.outer(*inputs, **kwargs)
+        else:
+            return NotImplemented
+
     def __repr__(self):
         """
 
@@ -1026,7 +1079,7 @@ class Array(Base):
         >>> da.ones((10, 10), chunks=(5, 5), dtype='i4')
         dask.array<..., shape=(10, 10), dtype=int32, chunksize=(5, 5)>
         """
-        chunksize = str(tuple(c[0] if c else 0 for c in self.chunks))
+        chunksize = str(tuple(c[0] for c in self.chunks))
         name = self.name.rsplit('-', 1)[0]
         return ("dask.array<%s, shape=%s, dtype=%s, chunksize=%s>" %
                 (name, self.shape, self.dtype, chunksize))
@@ -1119,85 +1172,6 @@ class Array(Base):
         from ..dataframe import from_dask_array
         return from_dask_array(self, columns=columns)
 
-    def cache(self, store=None, **kwargs):
-        """ Evaluate and cache array
-
-        Parameters
-        ----------
-        store: MutableMapping or ndarray-like
-            Place to put computed and cached chunks
-        kwargs:
-            Keyword arguments to pass on to ``get`` function for scheduling
-
-        Examples
-        --------
-
-        This triggers evaluation and store the result in either
-
-        1.  An ndarray object supporting setitem (see da.store)
-        2.  A MutableMapping like a dict or chest
-
-        It then returns a new dask array that points to this store.
-        This returns a semantically equivalent dask array.
-
-        >>> import dask.array as da
-        >>> x = da.arange(5, chunks=2)
-        >>> y = 2*x + 1
-        >>> z = y.cache()  # triggers computation
-
-        >>> y.compute()  # Does entire computation
-        array([1, 3, 5, 7, 9])
-
-        >>> z.compute()  # Just pulls from store
-        array([1, 3, 5, 7, 9])
-
-        You might base a cache off of an array like a numpy array or
-        h5py.Dataset.
-
-        >>> cache = np.empty(5, dtype=x.dtype)
-        >>> z = y.cache(store=cache)
-        >>> cache
-        array([1, 3, 5, 7, 9])
-
-        Or one might use a MutableMapping like a dict or chest
-
-        >>> cache = dict()
-        >>> z = y.cache(store=cache)
-        >>> cache  # doctest: +SKIP
-        {('x', 0): array([1, 3]),
-         ('x', 1): array([5, 7]),
-         ('x', 2): array([9])}
-        """
-        warnings.warn("Deprecation Warning: The `cache` method is deprecated, "
-                      "and will be removed in the next release. To achieve "
-                      "the same behavior, either write to disk or use "
-                      "`Client.persist`, from `dask.distributed`.")
-        if store is not None and hasattr(store, 'shape'):
-            self.store(store)
-            return from_array(store, chunks=self.chunks)
-        if store is None:
-            try:
-                from chest import Chest
-                store = Chest()
-            except ImportError:
-                if self.nbytes <= 1e9:
-                    store = dict()
-                else:
-                    msg = ("No out-of-core storage found."
-                           "Either:\n"
-                           "1. Install ``chest``, an out-of-core dictionary\n"
-                           "2. Provide an on-disk array like an h5py.Dataset")
-                    raise ValueError(msg)   # pragma: no cover
-        if isinstance(store, MutableMapping):
-            name = 'cache-' + tokenize(self)
-            dsk = dict(((name, k[1:]), (operator.setitem, store, (tuple, list(k)), k))
-                       for k in core.flatten(self._keys()))
-            Array._get(sharedict.merge(dsk, self.dask), list(dsk.keys()), **kwargs)
-
-            dsk2 = dict((k, (operator.getitem, store, (tuple, list(k))))
-                        for k in store)
-            return Array(dsk2, self.name, chunks=self.chunks, dtype=self.dtype)
-
     def __int__(self):
         return int(self.compute())
 
@@ -1479,68 +1453,101 @@ class Array(Base):
     def __rxor__(self, other):
         return elemwise(operator.xor, other, self)
 
+    def __matmul__(self, other):
+        if not hasattr(other, 'ndim'):
+            other = np.asarray(other)  # account for array-like RHS
+        if other.ndim > 2:
+            msg = ('The matrix multiplication operator (@) is not yet '
+                   'implemented for higher-dimensional Dask arrays. Try '
+                   '`dask.array.tensordot` and see the discussion at '
+                   'https://github.com/dask/dask/pull/2349 for details.')
+            raise NotImplementedError(msg)
+        return tensordot(self, other, axes=((self.ndim - 1,),
+                                            (other.ndim - 2,)))
+
+    def __rmatmul__(self, other):
+        if not hasattr(other, 'ndim'):
+            other = np.asarray(other)  # account for array-like on LHS
+        if self.ndim > 2:
+            msg = ('The matrix multiplication operator (@) is not yet '
+                   'implemented for higher-dimensional Dask arrays. Try '
+                   '`dask.array.tensordot` and see the discussion at '
+                   'https://github.com/dask/dask/pull/2349 for details.')
+            raise NotImplementedError(msg)
+        return tensordot(other, self, axes=((other.ndim - 1,),
+                                            (self.ndim - 2,)))
+
     @wraps(np.any)
-    def any(self, axis=None, keepdims=False, split_every=None):
+    def any(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import any
-        return any(self, axis=axis, keepdims=keepdims, split_every=split_every)
+        return any(self, axis=axis, keepdims=keepdims, split_every=split_every,
+                   out=out)
 
     @wraps(np.all)
-    def all(self, axis=None, keepdims=False, split_every=None):
+    def all(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import all
-        return all(self, axis=axis, keepdims=keepdims, split_every=split_every)
+        return all(self, axis=axis, keepdims=keepdims, split_every=split_every,
+                   out=out)
 
     @wraps(np.min)
-    def min(self, axis=None, keepdims=False, split_every=None):
+    def min(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import min
-        return min(self, axis=axis, keepdims=keepdims, split_every=split_every)
+        return min(self, axis=axis, keepdims=keepdims, split_every=split_every,
+                   out=out)
 
     @wraps(np.max)
-    def max(self, axis=None, keepdims=False, split_every=None):
+    def max(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import max
-        return max(self, axis=axis, keepdims=keepdims, split_every=split_every)
+        return max(self, axis=axis, keepdims=keepdims, split_every=split_every,
+                   out=out)
 
     @wraps(np.argmin)
-    def argmin(self, axis=None, split_every=None):
+    def argmin(self, axis=None, split_every=None, out=None):
         from .reductions import argmin
-        return argmin(self, axis=axis, split_every=split_every)
+        return argmin(self, axis=axis, split_every=split_every, out=out)
 
     @wraps(np.argmax)
-    def argmax(self, axis=None, split_every=None):
+    def argmax(self, axis=None, split_every=None, out=None):
         from .reductions import argmax
-        return argmax(self, axis=axis, split_every=split_every)
+        return argmax(self, axis=axis, split_every=split_every, out=out)
 
     @wraps(np.sum)
-    def sum(self, axis=None, dtype=None, keepdims=False, split_every=None):
+    def sum(self, axis=None, dtype=None, keepdims=False, split_every=None,
+            out=None):
         from .reductions import sum
         return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
-                   split_every=split_every)
+                   split_every=split_every, out=out)
 
     @wraps(np.prod)
-    def prod(self, axis=None, dtype=None, keepdims=False, split_every=None):
+    def prod(self, axis=None, dtype=None, keepdims=False, split_every=None,
+             out=None):
         from .reductions import prod
         return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
-                    split_every=split_every)
+                    split_every=split_every, out=out)
 
     @wraps(np.mean)
-    def mean(self, axis=None, dtype=None, keepdims=False, split_every=None):
+    def mean(self, axis=None, dtype=None, keepdims=False, split_every=None,
+             out=None):
         from .reductions import mean
         return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
-                    split_every=split_every)
+                    split_every=split_every, out=out)
 
     @wraps(np.std)
-    def std(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None):
+    def std(self, axis=None, dtype=None, keepdims=False, ddof=0,
+            split_every=None, out=None):
         from .reductions import std
         return std(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
-                   split_every=split_every)
+                   split_every=split_every, out=out)
 
     @wraps(np.var)
-    def var(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None):
+    def var(self, axis=None, dtype=None, keepdims=False, ddof=0,
+            split_every=None, out=None):
         from .reductions import var
         return var(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
-                   split_every=split_every)
+                   split_every=split_every, out=out)
 
     def moment(self, order, axis=None, dtype=None, keepdims=False, ddof=0,
-               split_every=None):
+               split_every=None, out=None):
         """Calculate the nth centralized moment.
 
         Parameters
@@ -1577,13 +1584,14 @@ class Array(Base):
 
         from .reductions import moment
         return moment(self, order, axis=axis, dtype=dtype, keepdims=keepdims,
-                      ddof=ddof, split_every=split_every)
+                      ddof=ddof, split_every=split_every, out=out)
 
-    def vnorm(self, ord=None, axis=None, keepdims=False, split_every=None):
+    def vnorm(self, ord=None, axis=None, keepdims=False, split_every=None,
+              out=None):
         """ Vector norm """
         from .reductions import vnorm
         return vnorm(self, ord=ord, axis=axis, keepdims=keepdims,
-                     split_every=split_every)
+                     split_every=split_every, out=out)
 
     @wraps(map_blocks)
     def map_blocks(self, func, *args, **kwargs):
@@ -1646,15 +1654,15 @@ class Array(Base):
         from .ghost import map_overlap
         return map_overlap(self, func, depth, boundary, trim, **kwargs)
 
-    def cumsum(self, axis, dtype=None):
+    def cumsum(self, axis, dtype=None, out=None):
         """ See da.cumsum for docstring """
         from .reductions import cumsum
-        return cumsum(self, axis, dtype)
+        return cumsum(self, axis, dtype, out=out)
 
-    def cumprod(self, axis, dtype=None):
+    def cumprod(self, axis, dtype=None, out=None):
         """ See da.cumprod for docstring """
         from .reductions import cumprod
-        return cumprod(self, axis, dtype)
+        return cumprod(self, axis, dtype, out=out)
 
     @wraps(squeeze)
     def squeeze(self):
@@ -1752,8 +1760,9 @@ class Array(Base):
         dask.array.from_delayed
         """
         from ..delayed import Delayed
-        return np.array(ndeepmap(self.ndim, lambda k: Delayed(k, self.dask), self._keys()),
-                        dtype=object)
+        dsk = self._optimize(self.dask, self._keys())
+        L = ndeepmap(self.ndim, lambda k: Delayed(k, dsk), self._keys())
+        return np.array(L, dtype=object)
 
     @wraps(np.repeat)
     def repeat(self, repeats, axis=None):
@@ -1786,7 +1795,7 @@ def normalize_chunks(chunks, shape=None):
     ((10, 10, 10), (5,))
 
     >>> normalize_chunks((), shape=(0, 0))  #  respects null dimensions
-    ((), ())
+    ((0,), (0,))
     """
     if chunks is None:
         raise ValueError(chunks_none_error_message)
@@ -1796,7 +1805,7 @@ def normalize_chunks(chunks, shape=None):
         if isinstance(chunks, Number):
             chunks = (chunks,) * len(shape)
     if not chunks and shape and all(s == 0 for s in shape):
-        chunks = ((),) * len(shape)
+        chunks = ((0,),) * len(shape)
 
     if shape and len(chunks) != len(shape):
         if not (len(shape) == 1 and sum(chunks) == shape[0]):
@@ -1811,11 +1820,16 @@ def normalize_chunks(chunks, shape=None):
         chunks = sum((blockdims_from_blockshape((s,), (c,))
                       if not isinstance(c, (tuple, list)) else (c,)
                       for s, c in zip(shape, chunks)), ())
+    for c in chunks:
+        if not c:
+            raise ValueError("Empty tuples are not allowed in chunks. Express "
+                             "zero length dimensions with 0(s) in chunks")
 
-    return tuple(map(tuple, chunks))
+    return tuple(tuple(int(x) if not math.isnan(x) else x for x in c) for c in chunks)
 
 
-def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
+def from_array(x, chunks, name=None, lock=False, asarray=True, fancy=True,
+               getitem=None):
     """ Create dask array from something that looks like an array
 
     Input must have a ``.shape`` and support numpy-style slicing.
@@ -1835,6 +1849,9 @@ def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
     lock : bool or Lock, optional
         If ``x`` doesn't support concurrent reads then provide a lock here, or
         pass in True to have dask.array create one for you.
+    asarray : bool, optional
+        If True (default), then chunks will be converted to instances of
+        ``ndarray``. Set to False to pass passed chunks through unchanged.
     fancy : bool, optional
         If ``x`` doesn't support fancy indexing (e.g. indexing with lists or
         arrays) then set to False. Default is True.
@@ -1871,12 +1888,12 @@ def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
         lock = SerializableLock()
 
     if getitem is None:
-        if fancy:
-            getitem = getarray
-        else:
-            getitem = getarray_nofancy
-    dsk = getem(original_name, chunks, getitem, out_name=name, lock=lock)
+        getitem = getter if fancy else getter_nofancy
+
+    dsk = getem(original_name, chunks, getitem=getitem, shape=x.shape,
+                out_name=name, lock=lock, asarray=asarray)
     dsk[original_name] = x
+
     return Array(dsk, name, chunks, dtype=x.dtype)
 
 
@@ -2564,19 +2581,32 @@ def asarray(array):
     --------
     >>> x = np.arange(3)
     >>> asarray(x)
-    dask.array<asarray, shape=(3,), dtype=int64, chunksize=(3,)>
+    dask.array<array, shape=(3,), dtype=int64, chunksize=(3,)>
     """
     if isinstance(array, Array):
         return array
-
-    name = 'asarray-' + tokenize(array)
     if not isinstance(getattr(array, 'shape', None), Iterable):
         array = np.asarray(array)
-    dsk = {(name,) + (0,) * len(array.shape):
-           (getarray_inline, name) + ((slice(None, None),) * len(array.shape),),
-           name: array}
-    chunks = tuple((d,) for d in array.shape)
-    return Array(dsk, name, chunks, dtype=array.dtype)
+    return from_array(array, chunks=array.shape, getitem=getter_inline)
+
+
+def asanyarray(array):
+    """Coerce argument into a dask array.
+
+    Subclasses of `np.ndarray` will be passed through as chunks unchanged.
+
+    Examples
+    --------
+    >>> x = np.arange(3)
+    >>> asanyarray(x)
+    dask.array<array, shape=(3,), dtype=int64, chunksize=(3,)>
+    """
+    if isinstance(array, Array):
+        return array
+    if not isinstance(getattr(array, 'shape', None), Iterable):
+        array = np.asanyarray(array)
+    return from_array(array, chunks=array.shape, getitem=getter_inline,
+                      asarray=False)
 
 
 def partial_by_order(*args, **kwargs):
@@ -2663,6 +2693,7 @@ def elemwise(op, *args, **kwargs):
     --------
     atop
     """
+    out = kwargs.pop('out', None)
     if not set(['name', 'dtype']).issuperset(kwargs):
         msg = "%s does not take the following keyword arguments %s"
         raise TypeError(msg % (op.__name__, str(sorted(set(kwargs) - set(['name', 'dtype'])))))
@@ -2695,14 +2726,48 @@ def elemwise(op, *args, **kwargs):
                                                   tokenize(op, dt, *args))
 
     if other:
-        return atop(partial_by_order, expr_inds,
-                    *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
-                    dtype=dt, name=name, function=op, other=other,
-                    token=funcname(op))
+        result = atop(partial_by_order, expr_inds,
+                      *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
+                      dtype=dt, name=name, function=op, other=other,
+                      token=funcname(op))
+    else:
+        result = atop(op, expr_inds,
+                      *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
+                      dtype=dt, name=name)
+
+    return handle_out(out, result)
+
+
+def handle_out(out, result):
+    """ Handle out paramters
+
+    If out is a dask.array then this overwrites the contents of that array with
+    the result
+    """
+    if isinstance(out, tuple):
+        if len(out) == 1:
+            out = out[0]
+        elif len(out) > 1:
+            raise NotImplementedError("The out parameter is not fully supported")
+        else:
+            out = None
+    if isinstance(out, Array):
+        if out.shape != result.shape:
+            raise NotImplementedError(
+                "Mismatched shapes between result and out parameter. "
+                "out=%s, result=%s" % (str(out.shape), str(result.shape)))
+        out._chunks = result.chunks
+        if hasattr(out, '_cached_keys'):
+            del out._cached_keys
+        out.dask = result.dask
+        out.dtype = result.dtype
+        out.name = result.name
+    elif out is not None:
+        msg = ("The out parameter is not fully supported."
+               " Received type %s, expected Dask Array" % type(out).__name__)
+        raise NotImplementedError(msg)
     else:
-        return atop(op, expr_inds,
-                    *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
-                    dtype=dt, name=name)
+        return result
 
 
 @wraps(np.around)
@@ -2767,7 +2832,23 @@ See the following documentation page for details:
 def where(condition, x=None, y=None):
     if x is None or y is None:
         raise TypeError(where_error_message)
-    return choose(condition, [y, x])
+
+    x = asarray(x)
+    y = asarray(y)
+
+    shape = broadcast_shapes(x.shape, y.shape)
+    dtype = np.promote_types(x.dtype, y.dtype)
+
+    x = broadcast_to(x, shape).astype(dtype)
+    y = broadcast_to(y, shape).astype(dtype)
+
+    if isinstance(condition, (bool, np.bool8)):
+        if condition:
+            return x
+        else:
+            return y
+    else:
+        return choose(condition, [y, x])
 
 
... 11869 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git



More information about the Python-modules-commits mailing list