[Python-modules-commits] [dask] 01/05: New upstream version 0.15.4
Diane Trout
diane at moszumanska.debian.org
Fri Oct 13 22:39:44 UTC 2017
This is an automated email from the git hooks/post-receive script.
diane pushed a commit to branch master
in repository dask.
commit facf75c988c49e0ad9d7e174163d0865b0ddc9e4
Author: Diane Trout <diane at ghic.org>
Date: Fri Oct 13 08:19:59 2017 -0700
New upstream version 0.15.4
---
.github/PULL_REQUEST_TEMPLATE.md | 4 +
.travis.yml | 79 +-
LICENSE.txt | 8 +-
continuous_integration/travis/install.sh | 15 +-
dask/array/__init__.py | 24 +-
dask/array/chunk.py | 4 +
dask/array/core.py | 1525 +++++++-------------------
dask/array/creation.py | 463 +++++++-
dask/array/ghost.py | 7 +-
dask/array/learn.py | 9 +-
dask/array/linalg.py | 70 +-
dask/array/ma.py | 251 +++++
dask/array/numpy_compat.py | 5 +
dask/array/optimization.py | 2 -
dask/array/percentile.py | 4 +-
dask/array/rechunk.py | 6 +-
dask/array/reductions.py | 83 +-
dask/array/routines.py | 866 +++++++++++++++
dask/array/slicing.py | 201 +++-
dask/array/tests/test_array_core.py | 870 +++------------
dask/array/tests/test_creation.py | 198 +++-
dask/array/tests/test_fft.py | 11 +-
dask/array/tests/test_ghost.py | 21 +-
dask/array/tests/test_linalg.py | 101 +-
dask/array/tests/test_masked.py | 315 ++++++
dask/array/tests/test_percentiles.py | 12 +-
dask/array/tests/test_rechunk.py | 29 +-
dask/array/tests/test_reductions.py | 27 +-
dask/array/tests/test_routines.py | 899 +++++++++++++++
dask/array/tests/test_slicing.py | 115 +-
dask/array/tests/test_testing.py | 19 +
dask/array/ufunc.py | 1 -
dask/array/utils.py | 17 +-
dask/bag/core.py | 81 +-
dask/bag/tests/test_bag.py | 96 +-
dask/base.py | 9 +-
dask/bytes/core.py | 5 +-
dask/bytes/tests/test_s3.py | 2 +-
dask/compatibility.py | 3 -
dask/core.py | 11 +-
dask/dataframe/__init__.py | 3 +-
dask/dataframe/accessor.py | 35 +-
dask/dataframe/categorical.py | 1 -
dask/dataframe/core.py | 155 ++-
dask/dataframe/groupby.py | 207 +++-
dask/dataframe/indexing.py | 32 +-
dask/dataframe/io/csv.py | 75 +-
dask/dataframe/io/demo.py | 2 +
dask/dataframe/io/hdf.py | 41 +-
dask/dataframe/io/io.py | 21 +-
dask/dataframe/io/parquet.py | 69 +-
dask/dataframe/io/sql.py | 4 +-
dask/dataframe/io/tests/test_csv.py | 62 +-
dask/dataframe/io/tests/test_hdf.py | 24 +
dask/dataframe/io/tests/test_io.py | 29 +-
dask/dataframe/io/tests/test_parquet.py | 80 ++
dask/dataframe/methods.py | 27 +-
dask/dataframe/partitionquantiles.py | 7 +-
dask/dataframe/shuffle.py | 6 +-
dask/dataframe/tests/test_categorical.py | 37 +-
dask/dataframe/tests/test_dataframe.py | 130 ++-
dask/dataframe/tests/test_format.py | 113 +-
dask/dataframe/tests/test_groupby.py | 196 +++-
dask/dataframe/tests/test_indexing.py | 6 +
dask/dataframe/tests/test_rolling.py | 5 +-
dask/dataframe/tests/test_shuffle.py | 39 +-
dask/dataframe/tests/test_utils_dataframe.py | 71 +-
dask/dataframe/tseries/resample.py | 2 +-
dask/dataframe/utils.py | 77 +-
dask/delayed.py | 18 +-
dask/diagnostics/profile.py | 8 +-
dask/diagnostics/profile_visualize.py | 6 +-
dask/diagnostics/tests/test_profiler.py | 40 +-
dask/optimize.py | 11 +-
dask/rewrite.py | 2 +-
dask/tests/test_base.py | 2 +-
dask/tests/test_core.py | 4 +-
dask/tests/test_distributed.py | 25 +-
dask/tests/test_utils.py | 41 +-
dask/utils.py | 91 +-
docs/source/array-api.rst | 71 ++
docs/source/array-sparse.rst | 11 +-
docs/source/changelog.rst | 120 +-
docs/source/conf.py | 6 +-
docs/source/custom-graphs.rst | 2 +-
docs/source/daskcheatsheet.pdf | Bin 262284 -> 203552 bytes
docs/source/dataframe-api.rst | 117 +-
docs/source/dataframe-groupby.rst | 67 +-
docs/source/dataframe-overview.rst | 4 +-
docs/source/develop.rst | 9 +-
docs/source/funding.rst | 4 +-
docs/source/index.rst | 13 +-
docs/source/install.rst | 4 +-
docs/source/support.rst | 15 +-
setup.cfg | 3 +
setup.py | 10 +-
96 files changed, 6150 insertions(+), 2578 deletions(-)
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..d48dfb2
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,4 @@
+- [ ] Tests added / passed
+- [ ] Passes `flake8 dask`
+- [ ] Fully documented, including `docs/source/changelog.rst` for all changes
+ and one of the `docs/source/*-api.rst` files for new API
diff --git a/.travis.yml b/.travis.yml
index ea6c0bc..fa7761c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,32 +1,79 @@
language: generic
sudo: false
dist: trusty
+os: linux
-env:
- matrix:
- - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=2.7 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- - PYTHON=3.5 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=3.6 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+_base_envs:
+ - &coverage COVERAGE='true' PARALLEL='false'
+ - &no_coverage COVERAGE='false' PARALLEL='true'
+ - &optimize PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+ - &no_optimize XTRATESTARGS=
+ - &imports TEST_IMPORTS='true'
+ - &no_imports TEST_IMPORTS='false'
-matrix:
+jobs:
fast_finish: true
include:
- - os: osx
- env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
- # Together with fast_finish, allow build to be marked successful before the OS X job finishes
+ - env:
+ - PYTHON=2.7
+ - NUMPY=1.12.1
+ - PANDAS=0.19.2
+ - *coverage
+ - *no_optimize
+ - *no_imports
+
+ - env:
+ - PYTHON=2.7
+ - NUMPY=1.13.0
+ - PANDAS=0.20.2
+ - *no_coverage
+ - *optimize
+ - *no_imports
+
+ - env:
+ - PYTHON=3.4
+ - NUMPY=1.10.4
+ - PANDAS=0.19.1
+ - *no_coverage
+ - *optimize
+ - *no_imports
+ if: type != pull_request
+
+ - env:
+ - PYTHON=3.5
+ - NUMPY=1.12.1
+ - PANDAS=0.19.2
+ - *no_coverage
+ - *no_optimize
+ - *no_imports
+
+ - env: &py36_env
+ - PYTHON=3.6
+ - NUMPY=1.13.0
+ - PANDAS=0.20.2
+ - *no_coverage
+ - *no_optimize
+ - *imports
+
+ - env: *py36_env
+ if: type != pull_request
+ os: osx
+
allow_failures:
- - os: osx
- # This needs to be the exact same line as above
- env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+ - os: osx
+
+
+install:
+ - source continuous_integration/travis/install.sh
-install: source continuous_integration/travis/install.sh
script:
- source continuous_integration/travis/run_tests.sh
- flake8 dask
+ - pycodestyle --select=E722 dask # check for bare `except:` blocks
- if [[ $TEST_IMPORTS == 'true' ]]; then source continuous_integration/travis/test_imports.sh; fi
-after_success: source continuous_integration/travis/after_success.sh
+
+after_success:
+ - source continuous_integration/travis/after_success.sh
notifications:
email: false
diff --git a/LICENSE.txt b/LICENSE.txt
index 0c01700..5f68378 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2014-2015, Continuum Analytics, Inc. and contributors
+Copyright (c) 2014-2017, Anaconda, Inc. and contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -11,9 +11,9 @@ Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
-Neither the name of Continuum Analytics nor the names of any contributors
-may be used to endorse or promote products derived from this software
-without specific prior written permission.
+Neither the name of Anaconda nor the names of any contributors may be used to
+endorse or promote products derived from this software without specific prior
+written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index d3bb215..21024ae 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -1,10 +1,10 @@
# Install conda
case "$(uname -s)" in
'Darwin')
- MINICONDA_FILENAME="Miniconda3-latest-MacOSX-x86_64.sh"
+ MINICONDA_FILENAME="Miniconda3-4.3.21-MacOSX-x86_64.sh"
;;
'Linux')
- MINICONDA_FILENAME="Miniconda3-latest-Linux-x86_64.sh"
+ MINICONDA_FILENAME="Miniconda3-4.3.21-Linux-x86_64.sh"
;;
*) ;;
esac
@@ -43,7 +43,6 @@ conda install -q -c conda-forge \
ipython \
partd \
psutil \
- pytables \
"pytest<=3.1.1" \
scikit-image \
scikit-learn \
@@ -51,6 +50,9 @@ conda install -q -c conda-forge \
sqlalchemy \
toolz
+# install pytables from defaults for now
+conda install -q pytables
+
pip install -q git+https://github.com/dask/partd --upgrade --no-deps
pip install -q git+https://github.com/dask/zict --upgrade --no-deps
pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
@@ -79,9 +81,10 @@ pip install -q \
mmh3 \
pandas_datareader \
pytest-xdist \
- xxhash
+ xxhash \
+ pycodestyle
# Install dask
pip install -q --no-deps -e .[complete]
-echo pip freeze
-pip freeze
+echo conda list
+conda list
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 483dbd3..9155423 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -1,14 +1,16 @@
from __future__ import absolute_import, division, print_function
from ..utils import ignoring
-from .core import (Array, stack, concatenate, take, tensordot, transpose,
- from_array, choose, where, coarsen, insert, broadcast_to, ravel,
- roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
- digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
- dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
- from_delayed, round, swapaxes, repeat, asarray, asanyarray)
-from .core import (around, isnull, notnull, isclose, eye, triu,
- tril, diag, corrcoef)
+from .core import (Array, concatenate, stack, from_array, store, map_blocks,
+ atop, to_hdf5, to_npy_stack, from_npy_stack, from_delayed,
+ asarray, asanyarray, broadcast_to)
+from .routines import (take, choose, argwhere, where, coarsen, insert,
+ ravel, roll, unique, squeeze, topk, ptp, diff, ediff1d,
+ bincount, digitize, histogram, cov, array, dstack,
+ vstack, hstack, compress, extract, round, count_nonzero,
+ flatnonzero, nonzero, around, isnull, notnull, isclose,
+ corrcoef, swapaxes, tensordot, transpose, dot,
+ apply_along_axis, apply_over_axes, result_type)
from .reshape import reshape
from .ufunc import (add, subtract, multiply, divide, logaddexp, logaddexp2,
true_divide, floor_divide, negative, power, remainder, mod, conj, exp,
@@ -30,10 +32,14 @@ from .reductions import (sum, prod, mean, std, var, any, all, min, max, vnorm,
from .percentile import percentile
with ignoring(ImportError):
from .reductions import nanprod, nancumprod, nancumsum
+with ignoring(ImportError):
+ from . import ma
from . import random, linalg, ghost, learn, fft
from .wrap import ones, zeros, empty, full
+from .creation import ones_like, zeros_like, empty_like, full_like
from .rechunk import rechunk
from ..context import set_options
from ..base import compute
from .optimization import optimize
-from .creation import arange, linspace, indices
+from .creation import (arange, linspace, indices, diag, eye, triu, tril,
+ fromfunction, tile, repeat)
diff --git a/dask/array/chunk.py b/dask/array/chunk.py
index f5e2d73..7bc6c72 100644
--- a/dask/array/chunk.py
+++ b/dask/array/chunk.py
@@ -191,3 +191,7 @@ def topk(k, x):
def arange(start, stop, step, length, dtype):
res = np.arange(start, stop, step, dtype)
return res[:-1] if len(res) > length else res
+
+
+def astype(x, astype_dtype=None, **kwargs):
+ return x.astype(astype_dtype, **kwargs)
diff --git a/dask/array/core.py b/dask/array/core.py
index d7d5ac3..de1df97 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -4,10 +4,9 @@ from bisect import bisect
from collections import Iterable, Mapping
from collections import Iterator
from functools import partial, wraps
-import inspect
from itertools import product
import math
-from numbers import Integral, Number
+from numbers import Number
import operator
from operator import add, getitem, mul
import os
@@ -19,25 +18,24 @@ import uuid
import warnings
try:
- from cytoolz.curried import (partition, concat, pluck, join, first,
- groupby, valmap, accumulate, interleave,
- sliding_window, assoc)
+ from cytoolz import (partition, concat, join, first,
+ groupby, valmap, accumulate, assoc)
+ from cytoolz.curried import filter, pluck
except ImportError:
- from toolz.curried import (partition, concat, pluck, join, first,
- groupby, valmap, accumulate,
- interleave, sliding_window, assoc)
+ from toolz import (partition, concat, join, first,
+ groupby, valmap, accumulate, assoc)
+ from toolz.curried import filter, pluck
from toolz import pipe, map, reduce
import numpy as np
from . import chunk
-from .slicing import slice_array
-from . import numpy_compat
+from .slicing import slice_array, replace_ellipsis
from ..base import Base, tokenize, normalize_token
from ..context import _globals
from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete,
is_integer, IndexCallable, funcname, derived_from,
- SerializableLock, ensure_dict, package_of)
+ SerializableLock, ensure_dict, Dispatch)
from ..compatibility import unicode, long, getargspec, zip_longest, apply
from ..delayed import to_task_dask
from .. import threaded, core
@@ -45,6 +43,20 @@ from .. import sharedict
from ..sharedict import ShareDict
+concatenate_lookup = Dispatch('concatenate')
+tensordot_lookup = Dispatch('tensordot')
+concatenate_lookup.register((object, np.ndarray), np.concatenate)
+tensordot_lookup.register((object, np.ndarray), np.tensordot)
+
+
+ at tensordot_lookup.register_lazy('sparse')
+ at concatenate_lookup.register_lazy('sparse')
+def register_sparse():
+ import sparse
+ concatenate_lookup.register(sparse.COO, sparse.concatenate)
+ tensordot_lookup.register(sparse.COO, sparse.tensordot)
+
+
def getter(a, b, asarray=True, lock=None):
if isinstance(b, tuple) and any(x is None for x in b):
b2 = tuple(x for x in b if x is not None)
@@ -269,8 +281,9 @@ def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)),
{'i': 'Hello', 'j': (2, 3)}
"""
# List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
+ argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
L = concat([zip(inds, dims) for (x, inds), (x, dims)
- in join(first, argpairs, first, numblocks.items())])
+ in join(first, argpairs2, first, numblocks.items())])
g = groupby(0, L)
g = dict((k, set([d for i, d in v])) for k, v in g.items())
@@ -380,6 +393,13 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
{('z', 0): (apply, f, [('x', 0)], {'b': 10}),
('z', 1): (apply, f, [('x', 1)], {'b': 10})}
+ Include literals by indexing with ``None``
+
+ >>> top(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP
+ {('z', 0): (add, ('x', 0), 100),
+ ('z', 1): (add, ('x', 1), 100)}
+
+
See Also
--------
atop
@@ -389,9 +409,9 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
new_axes = kwargs.pop('new_axes', {})
argpairs = list(partition(2, arrind_pairs))
- assert set(numblocks) == set(pluck(0, argpairs))
+ assert set(numblocks) == {name for name, ind in argpairs if ind is not None}
- all_indices = pipe(argpairs, pluck(1), concat, set)
+ all_indices = pipe(argpairs, pluck(1), filter(None), concat, set)
dummy_indices = all_indices - set(out_indices)
# Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
@@ -407,20 +427,33 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
# {j: [1, 2, 3], ...} For j a dummy index of dimension 3
dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)
+ dsk = {}
+
+ # Unpack dask values in non-array arguments
+ for i, (arg, ind) in enumerate(argpairs):
+ if ind is None:
+ arg2, dsk2 = to_task_dask(arg)
+ if dsk2:
+ dsk.update(ensure_dict(dsk2))
+ argpairs[i] = (arg2, ind)
+
# Create argument lists
valtups = []
for kd in keydicts:
args = []
for arg, ind in argpairs:
- tups = lol_tuples((arg,), ind, kd, dummies)
- if any(nb == 1 for nb in numblocks[arg]):
- tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+ if ind is None:
+ args.append(arg)
else:
- tups2 = tups
- if concatenate and isinstance(tups2, list):
- axes = [n for n, i in enumerate(ind) if i in dummies]
- tups2 = (concatenate_axes, tups2, axes)
- args.append(tups2)
+ tups = lol_tuples((arg,), ind, kd, dummies)
+ if any(nb == 1 for nb in numblocks[arg]):
+ tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+ else:
+ tups2 = tups
+ if concatenate and isinstance(tups2, list):
+ axes = [n for n, i in enumerate(ind) if i in dummies]
+ tups2 = (concatenate_axes, tups2, axes)
+ args.append(tups2)
valtups.append(args)
if not kwargs: # will not be used in an apply, should be a tuple
@@ -429,7 +462,6 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
# Add heads to tuples
keys = [(output,) + kt for kt in keytups]
- dsk = {}
# Unpack delayed objects in kwargs
if kwargs:
task, dsk2 = to_task_dask(kwargs)
@@ -481,8 +513,8 @@ def _concatenate2(arrays, axes=[]):
return arrays
if len(axes) > 1:
arrays = [_concatenate2(a, axes=axes[1:]) for a in arrays]
- module = package_of(type(max(arrays, key=lambda x: x.__array_priority__))) or np
- return module.concatenate(arrays, axis=axes[0])
+ concatenate = concatenate_lookup.dispatch(type(max(arrays, key=lambda x: x.__array_priority__)))
+ return concatenate(arrays, axis=axes[0])
def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
@@ -637,9 +669,11 @@ def map_blocks(func, *args, **kwargs):
new_axis = [new_axis]
arrs = [a for a in args if isinstance(a, Array)]
- other = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
- argpairs = [(a.name, tuple(range(a.ndim))[::-1]) for a in arrs]
+ argpairs = [(a.name, tuple(range(a.ndim))[::-1])
+ if isinstance(a, Array)
+ else (a, None)
+ for a in args]
numblocks = {a.name: a.numblocks for a in arrs}
arginds = list(concat(argpairs))
out_ind = tuple(range(max(a.ndim for a in arrs)))[::-1]
@@ -648,19 +682,14 @@ def map_blocks(func, *args, **kwargs):
spec = getargspec(func)
block_id = ('block_id' in spec.args or
'block_id' in getattr(spec, 'kwonly_args', ()))
- except:
+ except Exception:
block_id = False
if block_id:
kwargs['block_id'] = '__dummy__'
- if other:
- dsk = top(partial_by_order, name, out_ind, *arginds,
- numblocks=numblocks, function=func, other=other,
- **kwargs)
- else:
- dsk = top(func, name, out_ind, *arginds, numblocks=numblocks,
- **kwargs)
+ dsk = top(func, name, out_ind, *arginds, numblocks=numblocks,
+ **kwargs)
# If func has block_id as an argument, add it to the kwargs for each call
if block_id:
@@ -725,7 +754,7 @@ def map_blocks(func, *args, **kwargs):
else:
try:
chunks2 = list(broadcast_chunks(*[a.chunks for a in arrs]))
- except:
+ except Exception:
raise ValueError("Arrays in `map_blocks` don't align, can't "
"infer output chunks. Please provide "
"`chunks` kwarg.")
@@ -783,56 +812,6 @@ def broadcast_chunks(*chunkss):
return tuple(result)
- at wraps(np.squeeze)
-def squeeze(a, axis=None):
- if 1 not in a.shape:
- return a
- if axis is None:
- axis = tuple(i for i, d in enumerate(a.shape) if d == 1)
- b = a.map_blocks(partial(np.squeeze, axis=axis), dtype=a.dtype)
- chunks = tuple(bd for bd in b.chunks if bd != (1,))
-
- name = 'squeeze-' + tokenize(a, axis)
- old_keys = list(product([b.name], *[range(len(bd)) for bd in b.chunks]))
- new_keys = list(product([name], *[range(len(bd)) for bd in chunks]))
-
- dsk = {n: b.dask[o] for o, n in zip(old_keys, new_keys)}
-
- return Array(sharedict.merge(b.dask, (name, dsk)), name, chunks, dtype=a.dtype)
-
-
-def topk(k, x):
- """ The top k elements of an array
-
- Returns the k greatest elements of the array in sorted order. Only works
- on arrays of a single dimension.
-
- This assumes that ``k`` is small. All results will be returned in a single
- chunk.
-
- Examples
- --------
-
- >>> x = np.array([5, 1, 3, 6])
- >>> d = from_array(x, chunks=2)
- >>> d.topk(2).compute()
- array([6, 5])
- """
- if x.ndim != 1:
- raise ValueError("Topk only works on arrays of one dimension")
-
- token = tokenize(k, x)
- name = 'chunk.topk-' + token
- dsk = dict(((name, i), (chunk.topk, k, key))
- for i, key in enumerate(x._keys()))
- name2 = 'topk-' + token
- dsk[(name2, 0)] = (getitem, (np.sort, (np.concatenate, list(dsk))),
- slice(-1, -k - 1, -1))
- chunks = ((k,),)
-
- return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype)
-
-
def store(sources, targets, lock=True, regions=None, compute=True, **kwargs):
""" Store dask arrays in array-like objects, overwrite data in target
@@ -963,6 +942,15 @@ def finalize(results):
return unpack_singleton(results)
+CHUNKS_NONE_ERROR_MESSAGE = """
+You must specify a chunks= keyword argument.
+This specifies the chunksize of your array blocks.
+
+See the following documentation page for details:
+ http://dask.pydata.org/en/latest/array-creation.html#chunks
+""".strip()
+
+
class Array(Base):
""" Parallel Dask Array
@@ -987,7 +975,7 @@ class Array(Base):
--------
dask.array.from_array
"""
- __slots__ = 'dask', 'name', '_chunks', 'dtype'
+ __slots__ = 'dask', '_name', '_cached_keys', '_chunks', 'dtype'
_optimize = staticmethod(optimize)
_default_get = staticmethod(threaded.get)
@@ -1004,7 +992,7 @@ class Array(Base):
self.name = name
self._chunks = normalize_chunks(chunks, shape)
if self._chunks is None:
- raise ValueError(chunks_none_error_message)
+ raise ValueError(CHUNKS_NONE_ERROR_MESSAGE)
if dtype is None:
raise ValueError("You must specify the dtype of the array")
self.dtype = np.dtype(dtype)
@@ -1103,12 +1091,20 @@ class Array(Base):
""" Length of one array element in bytes """
return self.dtype.itemsize
+ @property
+ def name(self):
+ return self._name
+
+ @name.setter
+ def name(self, val):
+ self._name = val
+ # Clear the key cache when the name is reset
+ self._cached_keys = None
+
def _keys(self, *args):
if not args:
- try:
+ if self._cached_keys is not None:
return self._cached_keys
- except AttributeError:
- pass
if not self.chunks:
return [(self.name,)]
@@ -1187,6 +1183,7 @@ class Array(Base):
return complex(self.compute())
def __setitem__(self, key, value):
+ from .routines import where
if isinstance(key, Array):
if isinstance(value, Array) and value.ndim > 1:
raise ValueError('boolean index array should have 1 dimension')
@@ -1200,12 +1197,11 @@ class Array(Base):
% type(key))
def __getitem__(self, index):
- out = 'getitem-' + tokenize(self, index)
-
# Field access, e.g. x['a'] or x[['a', 'b']]
if (isinstance(index, (str, unicode)) or
(isinstance(index, list) and index and
all(isinstance(i, (str, unicode)) for i in index))):
+ out = 'getitem-' + tokenize(self, index)
if isinstance(index, (str, unicode)):
dt = self.dtype[index]
else:
@@ -1219,55 +1215,67 @@ class Array(Base):
else:
return self.map_blocks(getitem, index, dtype=dt, name=out)
- # Slicing
- if isinstance(index, Array):
- return slice_with_dask_array(self, index)
-
if not isinstance(index, tuple):
index = (index,)
- if any(isinstance(i, Array) for i in index):
- raise NotImplementedError("Indexing with a dask Array")
+ from .slicing import normalize_index, slice_with_dask_array
+ index2 = normalize_index(index, self.shape)
+
+ if any(isinstance(i, Array) for i in index2):
+ self, index2 = slice_with_dask_array(self, index2)
- if all(isinstance(i, slice) and i == slice(None) for i in index):
+ if all(isinstance(i, slice) and i == slice(None) for i in index2):
return self
- dsk, chunks = slice_array(out, self.name, self.chunks, index)
+ out = 'getitem-' + tokenize(self, index2)
+ dsk, chunks = slice_array(out, self.name, self.chunks, index2)
dsk2 = sharedict.merge(self.dask, (out, dsk))
return Array(dsk2, out, chunks, dtype=self.dtype)
def _vindex(self, key):
- if (not isinstance(key, tuple) or
- not len([k for k in key if isinstance(k, (np.ndarray, list))]) >= 2 or
- not all(isinstance(k, (np.ndarray, list)) or k == slice(None, None)
- for k in key)):
- msg = ("vindex expects only lists and full slices\n"
- "At least two entries must be a list\n"
- "For other combinations try doing normal slicing first, followed\n"
- "by vindex slicing. Got: \n\t%s")
- raise IndexError(msg % str(key))
- if any((isinstance(k, np.ndarray) and k.ndim != 1) or
- (isinstance(k, list) and k and isinstance(k[0], list))
- for k in key):
- raise IndexError("vindex does not support multi-dimensional keys\n"
- "Got: %s" % str(key))
- if len(set(len(k) for k in key if isinstance(k, (list, np.ndarray)))) != 1:
- raise IndexError("All indexers must have the same length, got\n"
- "\t%s" % str(key))
- key = key + (slice(None, None),) * (self.ndim - len(key))
- key = [i if isinstance(i, list) else
- i.tolist() if isinstance(i, np.ndarray) else
- None for i in key]
+ if not isinstance(key, tuple):
+ key = (key,)
+ if any(k is None for k in key):
+ raise IndexError(
+ "vindex does not support indexing with None (np.newaxis), "
+ "got {}".format(key))
+ if all(isinstance(k, slice) for k in key):
+ raise IndexError(
+ "vindex requires at least one non-slice to vectorize over. "
+ "Use normal slicing instead when only using slices. Got: {}"
+ .format(key))
return _vindex(self, *key)
@property
def vindex(self):
+ """Vectorized indexing with broadcasting.
+
+ This is equivalent to numpy's advanced indexing, using arrays that are
+ broadcast against each other. This allows for pointwise indexing:
+
+ >>> x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+ >>> x = from_array(x, chunks=2)
+ >>> x.vindex[[0, 1, 2], [0, 1, 2]].compute()
+ array([1, 5, 9])
+
+ Mixed basic/advanced indexing with slices/arrays is also supported. The
+ order of dimensions in the result follows those proposed for
+ ndarray.vindex [1]_: the subspace spanned by arrays is followed by all
+ slices.
+
+ Note: ``vindex`` provides more general functionality than standard
+ indexing, but it also has fewer optimizations and can be significantly
+ slower.
+
+ _[1]: https://github.com/numpy/numpy/pull/6256
+ """
return IndexCallable(self._vindex)
- @wraps(np.dot)
+ @derived_from(np.ndarray)
def dot(self, other):
+ from .routines import tensordot
return tensordot(self, other,
axes=((self.ndim - 1,), (other.ndim - 2,)))
@@ -1277,31 +1285,41 @@ class Array(Base):
@property
def T(self):
- return transpose(self)
+ return self.transpose()
@derived_from(np.ndarray)
def transpose(self, *axes):
+ from .routines import transpose
if not axes:
axes = None
elif len(axes) == 1 and isinstance(axes[0], Iterable):
axes = axes[0]
return transpose(self, axes=axes)
- @wraps(np.ravel)
+ @derived_from(np.ndarray)
def ravel(self):
+ from .routines import ravel
return ravel(self)
flatten = ravel
- @wraps(np.reshape)
+ @derived_from(np.ndarray)
+ def choose(self, choices):
+ from .routines import choose
+ return choose(self, choices)
+
+ @derived_from(np.ndarray)
def reshape(self, *shape):
from .reshape import reshape
if len(shape) == 1 and not isinstance(shape[0], Number):
shape = shape[0]
return reshape(self, shape)
- @wraps(topk)
def topk(self, k):
+ """The top k elements of an array.
+
+ See ``da.topk`` for docstring"""
+ from .routines import topk
return topk(k, self)
def astype(self, dtype, **kwargs):
@@ -1342,7 +1360,7 @@ class Array(Base):
" according to the rule "
"{2!r}".format(self.dtype, dtype, casting))
name = 'astype-' + tokenize(self, dtype, casting, copy)
- return self.map_blocks(_astype, dtype=dtype, name=name,
+ return self.map_blocks(chunk.astype, dtype=dtype, name=name,
astype_dtype=dtype, **kwargs)
def __abs__(self):
@@ -1454,6 +1472,7 @@ class Array(Base):
return elemwise(operator.xor, other, self)
def __matmul__(self, other):
+ from .routines import tensordot
if not hasattr(other, 'ndim'):
other = np.asarray(other) # account for array-like RHS
if other.ndim > 2:
@@ -1466,6 +1485,7 @@ class Array(Base):
(other.ndim - 2,)))
def __rmatmul__(self, other):
+ from .routines import tensordot
if not hasattr(other, 'ndim'):
other = np.asarray(other) # account for array-like on LHS
if self.ndim > 2:
@@ -1477,69 +1497,69 @@ class Array(Base):
return tensordot(other, self, axes=((other.ndim - 1,),
(self.ndim - 2,)))
- @wraps(np.any)
+ @derived_from(np.ndarray)
def any(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import any
return any(self, axis=axis, keepdims=keepdims, split_every=split_every,
out=out)
- @wraps(np.all)
+ @derived_from(np.ndarray)
def all(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import all
return all(self, axis=axis, keepdims=keepdims, split_every=split_every,
out=out)
- @wraps(np.min)
+ @derived_from(np.ndarray)
def min(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import min
return min(self, axis=axis, keepdims=keepdims, split_every=split_every,
out=out)
- @wraps(np.max)
+ @derived_from(np.ndarray)
def max(self, axis=None, keepdims=False, split_every=None, out=None):
from .reductions import max
return max(self, axis=axis, keepdims=keepdims, split_every=split_every,
out=out)
- @wraps(np.argmin)
+ @derived_from(np.ndarray)
def argmin(self, axis=None, split_every=None, out=None):
from .reductions import argmin
return argmin(self, axis=axis, split_every=split_every, out=out)
- @wraps(np.argmax)
+ @derived_from(np.ndarray)
def argmax(self, axis=None, split_every=None, out=None):
from .reductions import argmax
return argmax(self, axis=axis, split_every=split_every, out=out)
- @wraps(np.sum)
+ @derived_from(np.ndarray)
def sum(self, axis=None, dtype=None, keepdims=False, split_every=None,
out=None):
from .reductions import sum
return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
split_every=split_every, out=out)
- @wraps(np.prod)
+ @derived_from(np.ndarray)
def prod(self, axis=None, dtype=None, keepdims=False, split_every=None,
out=None):
from .reductions import prod
return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
split_every=split_every, out=out)
- @wraps(np.mean)
+ @derived_from(np.ndarray)
def mean(self, axis=None, dtype=None, keepdims=False, split_every=None,
out=None):
from .reductions import mean
return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
split_every=split_every, out=out)
- @wraps(np.std)
+ @derived_from(np.ndarray)
def std(self, axis=None, dtype=None, keepdims=False, ddof=0,
split_every=None, out=None):
from .reductions import std
return std(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
split_every=split_every, out=out)
- @wraps(np.var)
+ @derived_from(np.ndarray)
def var(self, axis=None, dtype=None, keepdims=False, ddof=0,
split_every=None, out=None):
from .reductions import var
@@ -1664,8 +1684,9 @@ class Array(Base):
from .reductions import cumprod
return cumprod(self, axis, dtype, out=out)
- @wraps(squeeze)
+ @derived_from(np.ndarray)
def squeeze(self):
+ from .routines import squeeze
return squeeze(self)
def rechunk(self, chunks, threshold=None, block_size_limit=None):
@@ -1687,7 +1708,7 @@ class Array(Base):
from .ufunc import conj
return conj(self)
- @wraps(np.clip)
+ @derived_from(np.ndarray)
def clip(self, min=None, max=None):
from .ufunc import clip
return clip(self, min, max)
@@ -1731,12 +1752,14 @@ class Array(Base):
out._chunks = chunks
return out
- @wraps(np.swapaxes)
+ @derived_from(np.ndarray)
def swapaxes(self, axis1, axis2):
+ from .routines import swapaxes
return swapaxes(self, axis1, axis2)
- @wraps(np.round)
+ @derived_from(np.ndarray)
def round(self, decimals=0):
+ from .routines import round
return round(self, decimals=decimals)
def copy(self):
@@ -1764,10 +1787,16 @@ class Array(Base):
L = ndeepmap(self.ndim, lambda k: Delayed(k, dsk), self._keys())
return np.array(L, dtype=object)
- @wraps(np.repeat)
+ @derived_from(np.ndarray)
def repeat(self, repeats, axis=None):
+ from .creation import repeat
return repeat(self, repeats, axis=axis)
+ @derived_from(np.ndarray)
+ def nonzero(self):
+ from .routines import nonzero
+ return nonzero(self)
+
def ensure_int(f):
i = int(f)
... 11559 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git
More information about the Python-modules-commits
mailing list