[Python-modules-commits] [dask] 02/08: New upstream version 0.15.1
Diane Trout
diane at moszumanska.debian.org
Wed Jul 26 22:02:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
diane pushed a commit to branch master
in repository dask.
commit f5cbe3f67f4a1a20e0f3979a738d809300acf0c1
Author: Diane Trout <diane at ghic.org>
Date: Thu Jul 20 12:04:48 2017 -0700
New upstream version 0.15.1
---
.travis.yml | 10 +-
MANIFEST.in | 4 +-
continuous_integration/travis/install.sh | 26 +-
continuous_integration/travis/run_tests.sh | 2 +
dask/__init__.py | 8 +-
dask/array/__init__.py | 18 +-
dask/array/core.py | 492 +++++++++-------
dask/array/fft.py | 67 +++
dask/array/ghost.py | 4 +-
dask/array/optimization.py | 121 ++--
dask/array/percentile.py | 4 -
dask/array/random.py | 2 +-
dask/array/reductions.py | 149 +++--
dask/array/slicing.py | 206 ++++---
dask/array/stats.py | 432 ++++++++++++++
dask/array/tests/test_array_core.py | 224 +++++--
dask/array/tests/test_fft.py | 79 +++
dask/array/tests/test_ghost.py | 6 +-
dask/array/tests/test_learn.py | 5 +-
dask/array/tests/test_linalg.py | 56 +-
dask/array/tests/test_optimization.py | 154 +++--
dask/array/tests/test_percentiles.py | 3 +-
dask/array/tests/test_reductions.py | 71 ++-
dask/array/tests/test_slicing.py | 75 ++-
dask/array/tests/test_sparse.py | 16 +-
dask/array/tests/test_stats.py | 135 +++++
dask/array/tests/test_ufunc.py | 220 +++++--
dask/array/ufunc.py | 232 +++++---
dask/array/utils.py | 2 +-
dask/array/wrap.py | 12 -
dask/async.py | 648 +--------------------
dask/bag/__init__.py | 3 +-
dask/bag/core.py | 625 +++++++++++++++-----
dask/bag/tests/test_bag.py | 248 ++++++--
dask/bag/text.py | 2 +-
dask/base.py | 34 +-
dask/bytes/core.py | 48 +-
dask/bytes/local.py | 8 +-
dask/bytes/s3.py | 6 +-
dask/bytes/tests/test_bytes_utils.py | 62 +-
dask/bytes/tests/test_local.py | 31 +
dask/bytes/utils.py | 53 +-
dask/callbacks.py | 18 +
dask/compatibility.py | 15 +
dask/dataframe/core.py | 85 +--
dask/dataframe/groupby.py | 18 +-
dask/dataframe/io/csv.py | 63 +-
dask/dataframe/io/demo.py | 6 +-
dask/dataframe/io/hdf.py | 14 +-
dask/dataframe/io/io.py | 13 +-
dask/dataframe/io/parquet.py | 118 ++--
dask/dataframe/io/tests/test_csv.py | 48 +-
dask/dataframe/io/tests/test_demo.py | 2 +
dask/dataframe/io/tests/test_hdf.py | 6 +-
dask/dataframe/io/tests/test_io.py | 15 +-
dask/dataframe/io/tests/test_parquet.py | 87 ++-
dask/dataframe/methods.py | 14 +-
dask/dataframe/multi.py | 2 +
dask/dataframe/shuffle.py | 23 +
dask/dataframe/tests/test_arithmetics_reduction.py | 12 +-
dask/dataframe/tests/test_categorical.py | 6 +-
dask/dataframe/tests/test_dataframe.py | 106 +++-
dask/dataframe/tests/test_groupby.py | 113 ++--
dask/dataframe/tests/test_hyperloglog.py | 4 +-
dask/dataframe/tests/test_indexing.py | 14 +
dask/dataframe/tests/test_multi.py | 51 +-
dask/dataframe/tests/test_rolling.py | 47 +-
dask/dataframe/tests/test_shuffle.py | 31 +-
dask/dataframe/tests/test_ufunc.py | 128 ++--
dask/dataframe/tests/test_utils_dataframe.py | 12 +-
dask/dataframe/utils.py | 54 +-
dask/delayed.py | 48 +-
dask/diagnostics/tests/test_progress.py | 2 +-
dask/distributed.py | 4 +-
dask/hashing.py | 102 ++++
dask/{async.py => local.py} | 251 +++-----
dask/multiprocessing.py | 96 ++-
dask/tests/test_base.py | 19 +-
dask/tests/test_cache.py | 2 +-
dask/tests/test_callbacks.py | 35 +-
dask/tests/test_delayed.py | 16 +-
dask/tests/test_distributed.py | 4 +-
dask/tests/test_hashing.py | 45 ++
dask/tests/{test_async.py => test_local.py} | 20 +-
dask/tests/test_multiprocessing.py | 14 +-
dask/tests/test_sizeof.py | 9 +-
dask/tests/test_threaded.py | 5 +-
dask/tests/test_utils.py | 197 +++----
dask/threaded.py | 19 +-
dask/utils.py | 460 +++++----------
docs/release-procedure.md | 9 +
docs/requirements-docs.txt | 3 +-
docs/source/array-api.rst | 40 ++
docs/source/array-creation.rst | 2 +-
docs/source/array-overview.rst | 4 +-
docs/source/array-stats.rst | 36 ++
docs/source/array.rst | 1 +
docs/source/bag-api.rst | 13 +-
docs/source/changelog.rst | 44 ++
docs/source/conf.py | 2 +-
docs/source/dataframe-api.rst | 1 -
docs/source/dataframe-create.rst | 2 +-
docs/source/dataframe-design.rst | 2 +-
docs/source/debugging.rst | 2 +-
docs/source/diagnostics.rst | 7 +-
docs/source/examples-tutorials.rst | 2 +
docs/source/examples/array-extend.rst | 4 +-
docs/source/faq.rst | 10 +-
docs/source/futures.rst | 525 +++++++++++++++++
docs/source/index.rst | 12 +-
docs/source/install.rst | 18 +-
docs/source/remote-data-services.rst | 2 +-
docs/source/scheduler-choice.rst | 27 +-
docs/source/scheduler-overview.rst | 6 +-
docs/source/scheduling-policy.rst | 7 +-
docs/source/scripts/scheduling.py | 6 +-
docs/source/shared.rst | 4 +-
setup.py | 4 +-
118 files changed, 5081 insertions(+), 2790 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 911d6f7..ea6c0bc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,14 +1,14 @@
language: generic
sudo: false
-os: linux
+dist: trusty
env:
matrix:
- - PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+ - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
+ - PYTHON=2.7 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- - PYTHON=3.5 NUMPY=1.11.3 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+ - PYTHON=3.5 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
+ - PYTHON=3.6 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
matrix:
fast_finish: true
diff --git a/MANIFEST.in b/MANIFEST.in
index fbd1ed7..c0dbaa0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,11 @@
recursive-include dask *.py
-recursive-include docs *.rst
+recursive-include docs/source *
+include docs/Makefile docs/make.bat
include setup.py
include README.rst
include LICENSE.txt
include MANIFEST.in
-prune docs/_build
include versioneer.py
include dask/_version.py
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index 37aedcf..d3bb215 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -26,44 +26,38 @@ echo "numpy $NUMPY" >> $CONDA_PREFIX/conda-meta/pinned
echo "pandas $PANDAS" >> $CONDA_PREFIX/conda-meta/pinned
# Install dependencies.
-# XXX: Due to a weird conda dependency resolution issue, we need to install
-# dependencies in two separate calls, otherwise we sometimes get version
-# incompatible with the installed version of numpy leading to crashes. This
-# seems to have to do with differences between conda-forge and defaults.
conda install -q -c conda-forge \
numpy \
pandas \
bcolz \
blosc \
bokeh \
+ boto3 \
chest \
+ cloudpickle \
coverage \
cytoolz \
+ distributed \
graphviz \
h5py \
ipython \
partd \
psutil \
- pyarrow \
pytables \
- pytest \
+ "pytest<=3.1.1" \
scikit-image \
scikit-learn \
scipy \
sqlalchemy \
toolz
-# Specify numpy/pandas here to prevent upgrade/downgrade
-conda install -q -c conda-forge \
- distributed \
- cloudpickle \
-
pip install -q git+https://github.com/dask/partd --upgrade --no-deps
pip install -q git+https://github.com/dask/zict --upgrade --no-deps
pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
+pip install -q git+https://github.com/dask/s3fs --upgrade --no-deps
-if [[ $PYTHONOPTIMIZE != '2' ]]; then
+if [[ $PYTHONOPTIMIZE != '2' ]] && [[ $NUMPY > '1.11.0' ]] && [[ $NUMPY < '1.13.0' ]]; then
conda install -q -c conda-forge numba cython
pip install -q git+https://github.com/dask/fastparquet
fi
@@ -76,12 +70,18 @@ pip install -q \
cachey \
graphviz \
moto \
+ pyarrow \
--upgrade --no-deps
pip install -q \
+ cityhash \
flake8 \
+ mmh3 \
pandas_datareader \
- pytest-xdist
+ pytest-xdist \
+ xxhash
# Install dask
pip install -q --no-deps -e .[complete]
+echo pip freeze
+pip freeze
diff --git a/continuous_integration/travis/run_tests.sh b/continuous_integration/travis/run_tests.sh
index 54bd62b..6c8c9c9 100644
--- a/continuous_integration/travis/run_tests.sh
+++ b/continuous_integration/travis/run_tests.sh
@@ -6,7 +6,9 @@ if [[ $PARALLEL == 'true' ]]; then
fi
if [[ $COVERAGE == 'true' ]]; then
+ echo "coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS"
coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS
else
+ echo "py.test dask --runslow --verbose $XTRATESTARGS"
py.test dask --runslow --verbose $XTRATESTARGS
fi
diff --git a/dask/__init__.py b/dask/__init__.py
index 30a3045..d81af59 100644
--- a/dask/__init__.py
+++ b/dask/__init__.py
@@ -2,9 +2,9 @@ from __future__ import absolute_import, division, print_function
from .core import istask
from .context import set_options
-from .async import get_sync as get
+from .local import get_sync as get
try:
- from .delayed import do, delayed
+ from .delayed import delayed
except ImportError:
pass
try:
@@ -12,6 +12,10 @@ try:
except ImportError:
pass
+# dask.async is deprecated. For now we import it to the top namespace to be
+# compatible with prior releases. This should be removed in a future release:
+import dask.async
+
from ._version import get_versions
versions = get_versions()
__version__ = versions['version']
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 22270ac..483dbd3 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -6,17 +6,21 @@ from .core import (Array, stack, concatenate, take, tensordot, transpose,
roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
- from_delayed, round, swapaxes, repeat, asarray)
+ from_delayed, round, swapaxes, repeat, asarray, asanyarray)
from .core import (around, isnull, notnull, isclose, eye, triu,
tril, diag, corrcoef)
from .reshape import reshape
-from .ufunc import (logaddexp, logaddexp2, conj, exp, log, log2, log10, log1p,
- expm1, sqrt, square, sin, cos, tan, arcsin, arccos, arctan, arctan2,
- hypot, sinh, cosh, tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg,
- logical_and, logical_or, logical_xor, logical_not, maximum, minimum,
+from .ufunc import (add, subtract, multiply, divide, logaddexp, logaddexp2,
+ true_divide, floor_divide, negative, power, remainder, mod, conj, exp,
+ exp2, log, log2, log10, log1p, expm1, sqrt, square, cbrt, reciprocal,
+ sin, cos, tan, arcsin, arccos, arctan, arctan2, hypot, sinh, cosh,
+ tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg, greater,
+ greater_equal, less, less_equal, not_equal, equal, logical_and,
+ logical_or, logical_xor, logical_not, maximum, minimum,
fmax, fmin, isreal, iscomplex, isfinite, isinf, isnan, signbit,
- copysign, nextafter, ldexp, fmod, floor, ceil, trunc, degrees, radians,
- rint, fix, angle, real, imag, clip, fabs, sign, absolute, frexp, modf)
+ copysign, nextafter, spacing, ldexp, fmod, floor, ceil, trunc, degrees,
+ radians, rint, fix, angle, real, imag, clip, fabs, sign, absolute,
+ i0, sinc, nan_to_num, frexp, modf, divide)
from .reductions import (sum, prod, mean, std, var, any, all, min, max, vnorm,
moment,
argmin, argmax,
diff --git a/dask/array/core.py b/dask/array/core.py
index d9d47f4..d7d5ac3 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -1,11 +1,12 @@
from __future__ import absolute_import, division, print_function
from bisect import bisect
-from collections import Iterable, MutableMapping, Mapping
+from collections import Iterable, Mapping
from collections import Iterator
from functools import partial, wraps
import inspect
from itertools import product
+import math
from numbers import Integral, Number
import operator
from operator import add, getitem, mul
@@ -44,23 +45,18 @@ from .. import sharedict
from ..sharedict import ShareDict
-def getarray(a, b, lock=None):
- """ Mimics getitem but includes call to np.asarray
-
- >>> getarray([1, 2, 3, 4, 5], slice(1, 4))
- array([2, 3, 4])
- """
+def getter(a, b, asarray=True, lock=None):
if isinstance(b, tuple) and any(x is None for x in b):
b2 = tuple(x for x in b if x is not None)
b3 = tuple(None if x is None else slice(None, None)
for x in b if not isinstance(x, (int, long)))
- return getarray(a, b2, lock)[b3]
+ return getter(a, b2, asarray=asarray, lock=lock)[b3]
if lock:
lock.acquire()
try:
c = a[b]
- if type(c) != np.ndarray:
+ if asarray:
c = np.asarray(c)
finally:
if lock:
@@ -68,17 +64,35 @@ def getarray(a, b, lock=None):
return c
-def getarray_nofancy(a, b, lock=None):
- """ A simple wrapper around ``getarray``.
+def getter_nofancy(a, b, asarray=True, lock=None):
+ """ A simple wrapper around ``getter``.
Used to indicate to the optimization passes that the backend doesn't
- support "fancy indexing"
+ support fancy indexing.
"""
- return getarray(a, b, lock=lock)
+ return getter(a, b, asarray=asarray, lock=lock)
+
+
+def getter_inline(a, b, asarray=True, lock=None):
+ """ A getter function that optimizations feel comfortable inlining
+
+ Slicing operations with this function may be inlined into a graph, such as
+ in the following rewrite
+ **Before**
-def getarray_inline(a, b, lock=None):
- return getarray(a, b, lock=lock)
+ >>> a = x[:10] # doctest: +SKIP
+ >>> b = a + 1 # doctest: +SKIP
+ >>> c = a * 2 # doctest: +SKIP
+
+ **After**
+
+ >>> b = x[:10] + 1 # doctest: +SKIP
+ >>> c = x[:10] * 2 # doctest: +SKIP
+
+ This inlining can be relevant to operations when running off of disk.
+ """
+ return getter(a, b, asarray=asarray, lock=lock)
from .optimization import optimize, fuse_slice
@@ -102,20 +116,21 @@ def slices_from_chunks(chunks):
for start, shape in zip(starts, shapes)]
-def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
+def getem(arr, chunks, getitem=getter, shape=None, out_name=None, lock=False,
+ asarray=True):
""" Dask getting various chunks from an array-like
>>> getem('X', chunks=(2, 3), shape=(4, 6)) # doctest: +SKIP
- {('X', 0, 0): (getarray, 'X', (slice(0, 2), slice(0, 3))),
- ('X', 1, 0): (getarray, 'X', (slice(2, 4), slice(0, 3))),
- ('X', 1, 1): (getarray, 'X', (slice(2, 4), slice(3, 6))),
- ('X', 0, 1): (getarray, 'X', (slice(0, 2), slice(3, 6)))}
+ {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))),
+ ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))),
+ ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))),
+ ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))}
>>> getem('X', chunks=((2, 2), (3, 3))) # doctest: +SKIP
- {('X', 0, 0): (getarray, 'X', (slice(0, 2), slice(0, 3))),
- ('X', 1, 0): (getarray, 'X', (slice(2, 4), slice(0, 3))),
- ('X', 1, 1): (getarray, 'X', (slice(2, 4), slice(3, 6))),
- ('X', 0, 1): (getarray, 'X', (slice(0, 2), slice(3, 6)))}
+ {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))),
+ ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))),
+ ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))),
+ ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))}
"""
out_name = out_name or arr
chunks = normalize_chunks(chunks, shape)
@@ -123,9 +138,10 @@ def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
keys = list(product([out_name], *[range(len(bds)) for bds in chunks]))
slices = slices_from_chunks(chunks)
- if lock:
- values = [(getitem, arr, x, lock) for x in slices]
+ if not asarray or lock:
+ values = [(getitem, arr, x, asarray, lock) for x in slices]
else:
+ # Common case, drop extra parameters
values = [(getitem, arr, x) for x in slices]
return dict(zip(keys, values))
@@ -473,7 +489,8 @@ def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
args = [np.ones((1,) * x.ndim, dtype=x.dtype)
if isinstance(x, Array) else x for x in args]
try:
- o = func(*args, **kwargs)
+ with np.errstate(all='ignore'):
+ o = func(*args, **kwargs)
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
tb = ''.join(traceback.format_tb(exc_traceback))
@@ -515,9 +532,13 @@ def map_blocks(func, *args, **kwargs):
new_axis : number or iterable, optional
New dimensions created by the function. Note that these are applied
after ``drop_axis`` (if present).
+ token : string, optional
+ The key prefix to use for the output array. If not provided, will be
+ determined from the function name.
name : string, optional
- The key name to use for the array. If not provided, will be determined
- by a hash of the arguments.
+ The key name to use for the output array. Note that this fully
+ specifies the output key name, and must be unique. If not provided,
+ will be determined by a hash of the arguments.
**kwargs :
Other keyword arguments to pass to function. Values must be constants
(not dask.arrays)
@@ -590,10 +611,11 @@ def map_blocks(func, *args, **kwargs):
>>> def func(block, block_id=None):
... pass
- You may specify the name of the resulting task in the graph with the
- optional ``name`` keyword argument.
+ You may specify the key name prefix of the resulting task in the graph with
+ the optional ``token`` keyword argument.
- >>> y = x.map_blocks(lambda x: x + 1, name='increment')
+ >>> x.map_blocks(lambda x: x + 1, token='increment') # doctest: +SKIP
+ dask.array<increment, shape=(100,), dtype=int64, chunksize=(10,)>
"""
if not callable(func):
msg = ("First argument must be callable function, not %s\n"
@@ -601,7 +623,10 @@ def map_blocks(func, *args, **kwargs):
" or: da.map_blocks(function, x, y, z)")
raise TypeError(msg % type(func).__name__)
name = kwargs.pop('name', None)
- name = name or '%s-%s' % (funcname(func), tokenize(func, args, **kwargs))
+ token = kwargs.pop('token', None)
+ if not name:
+ name = '%s-%s' % (token or funcname(func),
+ tokenize(token or func, args, **kwargs))
dtype = kwargs.pop('dtype', None)
chunks = kwargs.pop('chunks', None)
drop_axis = kwargs.pop('drop_axis', [])
@@ -1019,6 +1044,34 @@ class Array(Base):
def __len__(self):
return sum(self.chunks[0])
+ def __array_ufunc__(self, numpy_ufunc, method, *inputs, **kwargs):
+ out = kwargs.get('out', ())
+ for x in inputs + out:
+ if not isinstance(x, (np.ndarray, Number, Array)):
+ return NotImplemented
+
+ if method == '__call__':
+ if numpy_ufunc.signature is not None:
+ return NotImplemented
+ if numpy_ufunc.nout > 1:
+ from . import ufunc
+ try:
+ da_ufunc = getattr(ufunc, numpy_ufunc.__name__)
+ except AttributeError:
+ return NotImplemented
+ return da_ufunc(*inputs, **kwargs)
+ else:
+ return elemwise(numpy_ufunc, *inputs, **kwargs)
+ elif method == 'outer':
+ from . import ufunc
+ try:
+ da_ufunc = getattr(ufunc, numpy_ufunc.__name__)
+ except AttributeError:
+ return NotImplemented
+ return da_ufunc.outer(*inputs, **kwargs)
+ else:
+ return NotImplemented
+
def __repr__(self):
"""
@@ -1026,7 +1079,7 @@ class Array(Base):
>>> da.ones((10, 10), chunks=(5, 5), dtype='i4')
dask.array<..., shape=(10, 10), dtype=int32, chunksize=(5, 5)>
"""
- chunksize = str(tuple(c[0] if c else 0 for c in self.chunks))
+ chunksize = str(tuple(c[0] for c in self.chunks))
name = self.name.rsplit('-', 1)[0]
return ("dask.array<%s, shape=%s, dtype=%s, chunksize=%s>" %
(name, self.shape, self.dtype, chunksize))
@@ -1119,85 +1172,6 @@ class Array(Base):
from ..dataframe import from_dask_array
return from_dask_array(self, columns=columns)
- def cache(self, store=None, **kwargs):
- """ Evaluate and cache array
-
- Parameters
- ----------
- store: MutableMapping or ndarray-like
- Place to put computed and cached chunks
- kwargs:
- Keyword arguments to pass on to ``get`` function for scheduling
-
- Examples
- --------
-
- This triggers evaluation and store the result in either
-
- 1. An ndarray object supporting setitem (see da.store)
- 2. A MutableMapping like a dict or chest
-
- It then returns a new dask array that points to this store.
- This returns a semantically equivalent dask array.
-
- >>> import dask.array as da
- >>> x = da.arange(5, chunks=2)
- >>> y = 2*x + 1
- >>> z = y.cache() # triggers computation
-
- >>> y.compute() # Does entire computation
- array([1, 3, 5, 7, 9])
-
- >>> z.compute() # Just pulls from store
- array([1, 3, 5, 7, 9])
-
- You might base a cache off of an array like a numpy array or
- h5py.Dataset.
-
- >>> cache = np.empty(5, dtype=x.dtype)
- >>> z = y.cache(store=cache)
- >>> cache
- array([1, 3, 5, 7, 9])
-
- Or one might use a MutableMapping like a dict or chest
-
- >>> cache = dict()
- >>> z = y.cache(store=cache)
- >>> cache # doctest: +SKIP
- {('x', 0): array([1, 3]),
- ('x', 1): array([5, 7]),
- ('x', 2): array([9])}
- """
- warnings.warn("Deprecation Warning: The `cache` method is deprecated, "
- "and will be removed in the next release. To achieve "
- "the same behavior, either write to disk or use "
- "`Client.persist`, from `dask.distributed`.")
- if store is not None and hasattr(store, 'shape'):
- self.store(store)
- return from_array(store, chunks=self.chunks)
- if store is None:
- try:
- from chest import Chest
- store = Chest()
- except ImportError:
- if self.nbytes <= 1e9:
- store = dict()
- else:
- msg = ("No out-of-core storage found."
- "Either:\n"
- "1. Install ``chest``, an out-of-core dictionary\n"
- "2. Provide an on-disk array like an h5py.Dataset")
- raise ValueError(msg) # pragma: no cover
- if isinstance(store, MutableMapping):
- name = 'cache-' + tokenize(self)
- dsk = dict(((name, k[1:]), (operator.setitem, store, (tuple, list(k)), k))
- for k in core.flatten(self._keys()))
- Array._get(sharedict.merge(dsk, self.dask), list(dsk.keys()), **kwargs)
-
- dsk2 = dict((k, (operator.getitem, store, (tuple, list(k))))
- for k in store)
- return Array(dsk2, self.name, chunks=self.chunks, dtype=self.dtype)
-
def __int__(self):
return int(self.compute())
@@ -1479,68 +1453,101 @@ class Array(Base):
def __rxor__(self, other):
return elemwise(operator.xor, other, self)
+ def __matmul__(self, other):
+ if not hasattr(other, 'ndim'):
+ other = np.asarray(other) # account for array-like RHS
+ if other.ndim > 2:
+ msg = ('The matrix multiplication operator (@) is not yet '
+ 'implemented for higher-dimensional Dask arrays. Try '
+ '`dask.array.tensordot` and see the discussion at '
+ 'https://github.com/dask/dask/pull/2349 for details.')
+ raise NotImplementedError(msg)
+ return tensordot(self, other, axes=((self.ndim - 1,),
+ (other.ndim - 2,)))
+
+ def __rmatmul__(self, other):
+ if not hasattr(other, 'ndim'):
+ other = np.asarray(other) # account for array-like on LHS
+ if self.ndim > 2:
+ msg = ('The matrix multiplication operator (@) is not yet '
+ 'implemented for higher-dimensional Dask arrays. Try '
+ '`dask.array.tensordot` and see the discussion at '
+ 'https://github.com/dask/dask/pull/2349 for details.')
+ raise NotImplementedError(msg)
+ return tensordot(other, self, axes=((other.ndim - 1,),
+ (self.ndim - 2,)))
+
@wraps(np.any)
- def any(self, axis=None, keepdims=False, split_every=None):
+ def any(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import any
- return any(self, axis=axis, keepdims=keepdims, split_every=split_every)
+ return any(self, axis=axis, keepdims=keepdims, split_every=split_every,
+ out=out)
@wraps(np.all)
- def all(self, axis=None, keepdims=False, split_every=None):
+ def all(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import all
- return all(self, axis=axis, keepdims=keepdims, split_every=split_every)
+ return all(self, axis=axis, keepdims=keepdims, split_every=split_every,
+ out=out)
@wraps(np.min)
- def min(self, axis=None, keepdims=False, split_every=None):
+ def min(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import min
- return min(self, axis=axis, keepdims=keepdims, split_every=split_every)
+ return min(self, axis=axis, keepdims=keepdims, split_every=split_every,
+ out=out)
@wraps(np.max)
- def max(self, axis=None, keepdims=False, split_every=None):
+ def max(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import max
- return max(self, axis=axis, keepdims=keepdims, split_every=split_every)
+ return max(self, axis=axis, keepdims=keepdims, split_every=split_every,
+ out=out)
@wraps(np.argmin)
- def argmin(self, axis=None, split_every=None):
+ def argmin(self, axis=None, split_every=None, out=None):
from .reductions import argmin
- return argmin(self, axis=axis, split_every=split_every)
+ return argmin(self, axis=axis, split_every=split_every, out=out)
@wraps(np.argmax)
- def argmax(self, axis=None, split_every=None):
+ def argmax(self, axis=None, split_every=None, out=None):
from .reductions import argmax
- return argmax(self, axis=axis, split_every=split_every)
+ return argmax(self, axis=axis, split_every=split_every, out=out)
@wraps(np.sum)
- def sum(self, axis=None, dtype=None, keepdims=False, split_every=None):
+ def sum(self, axis=None, dtype=None, keepdims=False, split_every=None,
+ out=None):
from .reductions import sum
return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
- split_every=split_every)
+ split_every=split_every, out=out)
@wraps(np.prod)
- def prod(self, axis=None, dtype=None, keepdims=False, split_every=None):
+ def prod(self, axis=None, dtype=None, keepdims=False, split_every=None,
+ out=None):
from .reductions import prod
return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
- split_every=split_every)
+ split_every=split_every, out=out)
@wraps(np.mean)
- def mean(self, axis=None, dtype=None, keepdims=False, split_every=None):
+ def mean(self, axis=None, dtype=None, keepdims=False, split_every=None,
+ out=None):
from .reductions import mean
return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
- split_every=split_every)
+ split_every=split_every, out=out)
@wraps(np.std)
- def std(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None):
+ def std(self, axis=None, dtype=None, keepdims=False, ddof=0,
+ split_every=None, out=None):
from .reductions import std
return std(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
- split_every=split_every)
+ split_every=split_every, out=out)
@wraps(np.var)
- def var(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None):
+ def var(self, axis=None, dtype=None, keepdims=False, ddof=0,
+ split_every=None, out=None):
from .reductions import var
return var(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
- split_every=split_every)
+ split_every=split_every, out=out)
def moment(self, order, axis=None, dtype=None, keepdims=False, ddof=0,
- split_every=None):
+ split_every=None, out=None):
"""Calculate the nth centralized moment.
Parameters
@@ -1577,13 +1584,14 @@ class Array(Base):
from .reductions import moment
return moment(self, order, axis=axis, dtype=dtype, keepdims=keepdims,
- ddof=ddof, split_every=split_every)
+ ddof=ddof, split_every=split_every, out=out)
- def vnorm(self, ord=None, axis=None, keepdims=False, split_every=None):
+ def vnorm(self, ord=None, axis=None, keepdims=False, split_every=None,
+ out=None):
""" Vector norm """
from .reductions import vnorm
return vnorm(self, ord=ord, axis=axis, keepdims=keepdims,
- split_every=split_every)
+ split_every=split_every, out=out)
@wraps(map_blocks)
def map_blocks(self, func, *args, **kwargs):
@@ -1646,15 +1654,15 @@ class Array(Base):
from .ghost import map_overlap
return map_overlap(self, func, depth, boundary, trim, **kwargs)
- def cumsum(self, axis, dtype=None):
+ def cumsum(self, axis, dtype=None, out=None):
""" See da.cumsum for docstring """
from .reductions import cumsum
- return cumsum(self, axis, dtype)
+ return cumsum(self, axis, dtype, out=out)
- def cumprod(self, axis, dtype=None):
+ def cumprod(self, axis, dtype=None, out=None):
""" See da.cumprod for docstring """
from .reductions import cumprod
- return cumprod(self, axis, dtype)
+ return cumprod(self, axis, dtype, out=out)
@wraps(squeeze)
def squeeze(self):
@@ -1752,8 +1760,9 @@ class Array(Base):
dask.array.from_delayed
"""
from ..delayed import Delayed
- return np.array(ndeepmap(self.ndim, lambda k: Delayed(k, self.dask), self._keys()),
- dtype=object)
+ dsk = self._optimize(self.dask, self._keys())
+ L = ndeepmap(self.ndim, lambda k: Delayed(k, dsk), self._keys())
+ return np.array(L, dtype=object)
@wraps(np.repeat)
def repeat(self, repeats, axis=None):
@@ -1786,7 +1795,7 @@ def normalize_chunks(chunks, shape=None):
((10, 10, 10), (5,))
>>> normalize_chunks((), shape=(0, 0)) # respects null dimensions
- ((), ())
+ ((0,), (0,))
"""
if chunks is None:
raise ValueError(chunks_none_error_message)
@@ -1796,7 +1805,7 @@ def normalize_chunks(chunks, shape=None):
if isinstance(chunks, Number):
chunks = (chunks,) * len(shape)
if not chunks and shape and all(s == 0 for s in shape):
- chunks = ((),) * len(shape)
+ chunks = ((0,),) * len(shape)
if shape and len(chunks) != len(shape):
if not (len(shape) == 1 and sum(chunks) == shape[0]):
@@ -1811,11 +1820,16 @@ def normalize_chunks(chunks, shape=None):
chunks = sum((blockdims_from_blockshape((s,), (c,))
if not isinstance(c, (tuple, list)) else (c,)
for s, c in zip(shape, chunks)), ())
+ for c in chunks:
+ if not c:
+ raise ValueError("Empty tuples are not allowed in chunks. Express "
+ "zero length dimensions with 0(s) in chunks")
- return tuple(map(tuple, chunks))
+ return tuple(tuple(int(x) if not math.isnan(x) else x for x in c) for c in chunks)
-def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
+def from_array(x, chunks, name=None, lock=False, asarray=True, fancy=True,
+ getitem=None):
""" Create dask array from something that looks like an array
Input must have a ``.shape`` and support numpy-style slicing.
@@ -1835,6 +1849,9 @@ def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
lock : bool or Lock, optional
If ``x`` doesn't support concurrent reads then provide a lock here, or
pass in True to have dask.array create one for you.
+ asarray : bool, optional
+ If True (default), then chunks will be converted to instances of
+ ``ndarray``. Set to False to pass passed chunks through unchanged.
fancy : bool, optional
If ``x`` doesn't support fancy indexing (e.g. indexing with lists or
arrays) then set to False. Default is True.
@@ -1871,12 +1888,12 @@ def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
lock = SerializableLock()
if getitem is None:
- if fancy:
- getitem = getarray
- else:
- getitem = getarray_nofancy
- dsk = getem(original_name, chunks, getitem, out_name=name, lock=lock)
+ getitem = getter if fancy else getter_nofancy
+
+ dsk = getem(original_name, chunks, getitem=getitem, shape=x.shape,
+ out_name=name, lock=lock, asarray=asarray)
dsk[original_name] = x
+
return Array(dsk, name, chunks, dtype=x.dtype)
@@ -2564,19 +2581,32 @@ def asarray(array):
--------
>>> x = np.arange(3)
>>> asarray(x)
- dask.array<asarray, shape=(3,), dtype=int64, chunksize=(3,)>
+ dask.array<array, shape=(3,), dtype=int64, chunksize=(3,)>
"""
if isinstance(array, Array):
return array
-
- name = 'asarray-' + tokenize(array)
if not isinstance(getattr(array, 'shape', None), Iterable):
array = np.asarray(array)
- dsk = {(name,) + (0,) * len(array.shape):
- (getarray_inline, name) + ((slice(None, None),) * len(array.shape),),
- name: array}
- chunks = tuple((d,) for d in array.shape)
- return Array(dsk, name, chunks, dtype=array.dtype)
+ return from_array(array, chunks=array.shape, getitem=getter_inline)
+
+
+def asanyarray(array):
+ """Coerce argument into a dask array.
+
+ Subclasses of `np.ndarray` will be passed through as chunks unchanged.
+
+ Examples
+ --------
+ >>> x = np.arange(3)
+ >>> asanyarray(x)
+ dask.array<array, shape=(3,), dtype=int64, chunksize=(3,)>
+ """
+ if isinstance(array, Array):
+ return array
+ if not isinstance(getattr(array, 'shape', None), Iterable):
+ array = np.asanyarray(array)
+ return from_array(array, chunks=array.shape, getitem=getter_inline,
+ asarray=False)
def partial_by_order(*args, **kwargs):
@@ -2663,6 +2693,7 @@ def elemwise(op, *args, **kwargs):
--------
atop
"""
+ out = kwargs.pop('out', None)
if not set(['name', 'dtype']).issuperset(kwargs):
msg = "%s does not take the following keyword arguments %s"
raise TypeError(msg % (op.__name__, str(sorted(set(kwargs) - set(['name', 'dtype'])))))
@@ -2695,14 +2726,48 @@ def elemwise(op, *args, **kwargs):
tokenize(op, dt, *args))
if other:
- return atop(partial_by_order, expr_inds,
- *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
- dtype=dt, name=name, function=op, other=other,
- token=funcname(op))
+ result = atop(partial_by_order, expr_inds,
+ *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
+ dtype=dt, name=name, function=op, other=other,
+ token=funcname(op))
+ else:
+ result = atop(op, expr_inds,
+ *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
+ dtype=dt, name=name)
+
+ return handle_out(out, result)
+
+
+def handle_out(out, result):
+ """ Handle out paramters
+
+ If out is a dask.array then this overwrites the contents of that array with
+ the result
+ """
+ if isinstance(out, tuple):
+ if len(out) == 1:
+ out = out[0]
+ elif len(out) > 1:
+ raise NotImplementedError("The out parameter is not fully supported")
+ else:
+ out = None
+ if isinstance(out, Array):
+ if out.shape != result.shape:
+ raise NotImplementedError(
+ "Mismatched shapes between result and out parameter. "
+ "out=%s, result=%s" % (str(out.shape), str(result.shape)))
+ out._chunks = result.chunks
+ if hasattr(out, '_cached_keys'):
+ del out._cached_keys
+ out.dask = result.dask
+ out.dtype = result.dtype
+ out.name = result.name
+ elif out is not None:
+ msg = ("The out parameter is not fully supported."
+ " Received type %s, expected Dask Array" % type(out).__name__)
+ raise NotImplementedError(msg)
else:
- return atop(op, expr_inds,
- *concat((a, tuple(range(a.ndim)[::-1])) for a in arrays),
- dtype=dt, name=name)
+ return result
@wraps(np.around)
@@ -2767,7 +2832,23 @@ See the following documentation page for details:
def where(condition, x=None, y=None):
if x is None or y is None:
raise TypeError(where_error_message)
- return choose(condition, [y, x])
+
+ x = asarray(x)
+ y = asarray(y)
+
+ shape = broadcast_shapes(x.shape, y.shape)
+ dtype = np.promote_types(x.dtype, y.dtype)
+
+ x = broadcast_to(x, shape).astype(dtype)
+ y = broadcast_to(y, shape).astype(dtype)
+
+ if isinstance(condition, (bool, np.bool8)):
+ if condition:
+ return x
+ else:
+ return y
+ else:
+ return choose(condition, [y, x])
... 11869 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git
More information about the Python-modules-commits
mailing list