[Python-modules-commits] [dask] 01/05: New upstream version 0.15.4

Diane Trout diane at moszumanska.debian.org
Fri Oct 13 22:39:44 UTC 2017


This is an automated email from the git hooks/post-receive script.

diane pushed a commit to branch master
in repository dask.

commit facf75c988c49e0ad9d7e174163d0865b0ddc9e4
Author: Diane Trout <diane at ghic.org>
Date:   Fri Oct 13 08:19:59 2017 -0700

    New upstream version 0.15.4
---
 .github/PULL_REQUEST_TEMPLATE.md             |    4 +
 .travis.yml                                  |   79 +-
 LICENSE.txt                                  |    8 +-
 continuous_integration/travis/install.sh     |   15 +-
 dask/array/__init__.py                       |   24 +-
 dask/array/chunk.py                          |    4 +
 dask/array/core.py                           | 1525 +++++++-------------------
 dask/array/creation.py                       |  463 +++++++-
 dask/array/ghost.py                          |    7 +-
 dask/array/learn.py                          |    9 +-
 dask/array/linalg.py                         |   70 +-
 dask/array/ma.py                             |  251 +++++
 dask/array/numpy_compat.py                   |    5 +
 dask/array/optimization.py                   |    2 -
 dask/array/percentile.py                     |    4 +-
 dask/array/rechunk.py                        |    6 +-
 dask/array/reductions.py                     |   83 +-
 dask/array/routines.py                       |  866 +++++++++++++++
 dask/array/slicing.py                        |  201 +++-
 dask/array/tests/test_array_core.py          |  870 +++------------
 dask/array/tests/test_creation.py            |  198 +++-
 dask/array/tests/test_fft.py                 |   11 +-
 dask/array/tests/test_ghost.py               |   21 +-
 dask/array/tests/test_linalg.py              |  101 +-
 dask/array/tests/test_masked.py              |  315 ++++++
 dask/array/tests/test_percentiles.py         |   12 +-
 dask/array/tests/test_rechunk.py             |   29 +-
 dask/array/tests/test_reductions.py          |   27 +-
 dask/array/tests/test_routines.py            |  899 +++++++++++++++
 dask/array/tests/test_slicing.py             |  115 +-
 dask/array/tests/test_testing.py             |   19 +
 dask/array/ufunc.py                          |    1 -
 dask/array/utils.py                          |   17 +-
 dask/bag/core.py                             |   81 +-
 dask/bag/tests/test_bag.py                   |   96 +-
 dask/base.py                                 |    9 +-
 dask/bytes/core.py                           |    5 +-
 dask/bytes/tests/test_s3.py                  |    2 +-
 dask/compatibility.py                        |    3 -
 dask/core.py                                 |   11 +-
 dask/dataframe/__init__.py                   |    3 +-
 dask/dataframe/accessor.py                   |   35 +-
 dask/dataframe/categorical.py                |    1 -
 dask/dataframe/core.py                       |  155 ++-
 dask/dataframe/groupby.py                    |  207 +++-
 dask/dataframe/indexing.py                   |   32 +-
 dask/dataframe/io/csv.py                     |   75 +-
 dask/dataframe/io/demo.py                    |    2 +
 dask/dataframe/io/hdf.py                     |   41 +-
 dask/dataframe/io/io.py                      |   21 +-
 dask/dataframe/io/parquet.py                 |   69 +-
 dask/dataframe/io/sql.py                     |    4 +-
 dask/dataframe/io/tests/test_csv.py          |   62 +-
 dask/dataframe/io/tests/test_hdf.py          |   24 +
 dask/dataframe/io/tests/test_io.py           |   29 +-
 dask/dataframe/io/tests/test_parquet.py      |   80 ++
 dask/dataframe/methods.py                    |   27 +-
 dask/dataframe/partitionquantiles.py         |    7 +-
 dask/dataframe/shuffle.py                    |    6 +-
 dask/dataframe/tests/test_categorical.py     |   37 +-
 dask/dataframe/tests/test_dataframe.py       |  130 ++-
 dask/dataframe/tests/test_format.py          |  113 +-
 dask/dataframe/tests/test_groupby.py         |  196 +++-
 dask/dataframe/tests/test_indexing.py        |    6 +
 dask/dataframe/tests/test_rolling.py         |    5 +-
 dask/dataframe/tests/test_shuffle.py         |   39 +-
 dask/dataframe/tests/test_utils_dataframe.py |   71 +-
 dask/dataframe/tseries/resample.py           |    2 +-
 dask/dataframe/utils.py                      |   77 +-
 dask/delayed.py                              |   18 +-
 dask/diagnostics/profile.py                  |    8 +-
 dask/diagnostics/profile_visualize.py        |    6 +-
 dask/diagnostics/tests/test_profiler.py      |   40 +-
 dask/optimize.py                             |   11 +-
 dask/rewrite.py                              |    2 +-
 dask/tests/test_base.py                      |    2 +-
 dask/tests/test_core.py                      |    4 +-
 dask/tests/test_distributed.py               |   25 +-
 dask/tests/test_utils.py                     |   41 +-
 dask/utils.py                                |   91 +-
 docs/source/array-api.rst                    |   71 ++
 docs/source/array-sparse.rst                 |   11 +-
 docs/source/changelog.rst                    |  120 +-
 docs/source/conf.py                          |    6 +-
 docs/source/custom-graphs.rst                |    2 +-
 docs/source/daskcheatsheet.pdf               |  Bin 262284 -> 203552 bytes
 docs/source/dataframe-api.rst                |  117 +-
 docs/source/dataframe-groupby.rst            |   67 +-
 docs/source/dataframe-overview.rst           |    4 +-
 docs/source/develop.rst                      |    9 +-
 docs/source/funding.rst                      |    4 +-
 docs/source/index.rst                        |   13 +-
 docs/source/install.rst                      |    4 +-
 docs/source/support.rst                      |   15 +-
 setup.cfg                                    |    3 +
 setup.py                                     |   10 +-
 96 files changed, 6150 insertions(+), 2578 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..d48dfb2
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,4 @@
+- [ ] Tests added / passed
+- [ ] Passes `flake8 dask`
+- [ ] Fully documented, including `docs/source/changelog.rst` for all changes
+      and one of the `docs/source/*-api.rst` files for new API
diff --git a/.travis.yml b/.travis.yml
index ea6c0bc..fa7761c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,32 +1,79 @@
 language: generic
 sudo: false
 dist: trusty
+os: linux
 
-env:
-  matrix:
-    - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=2.7 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.5 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=3.6 NUMPY=1.13.0 PANDAS=0.20.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+_base_envs:
+  - &coverage COVERAGE='true' PARALLEL='false'
+  - &no_coverage COVERAGE='false' PARALLEL='true'
+  - &optimize PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+  - &no_optimize XTRATESTARGS=
+  - &imports TEST_IMPORTS='true'
+  - &no_imports TEST_IMPORTS='false'
 
-matrix:
+jobs:
   fast_finish: true
   include:
-  - os: osx
-    env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
-  # Together with fast_finish, allow build to be marked successful before the OS X job finishes
+    - env:
+      - PYTHON=2.7
+      - NUMPY=1.12.1
+      - PANDAS=0.19.2
+      - *coverage
+      - *no_optimize
+      - *no_imports
+
+    - env:
+      - PYTHON=2.7
+      - NUMPY=1.13.0
+      - PANDAS=0.20.2
+      - *no_coverage
+      - *optimize
+      - *no_imports
+
+    - env:
+      - PYTHON=3.4
+      - NUMPY=1.10.4
+      - PANDAS=0.19.1
+      - *no_coverage
+      - *optimize
+      - *no_imports
+      if: type != pull_request
+
+    - env:
+      - PYTHON=3.5
+      - NUMPY=1.12.1
+      - PANDAS=0.19.2
+      - *no_coverage
+      - *no_optimize
+      - *no_imports
+
+    - env: &py36_env
+      - PYTHON=3.6
+      - NUMPY=1.13.0
+      - PANDAS=0.20.2
+      - *no_coverage
+      - *no_optimize
+      - *imports
+
+    - env: *py36_env
+      if: type != pull_request
+      os: osx
+
   allow_failures:
-  - os: osx
-    # This needs to be the exact same line as above
-    env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+    - os: osx
+
+
+install:
+  - source continuous_integration/travis/install.sh
 
-install: source continuous_integration/travis/install.sh
 script:
   - source continuous_integration/travis/run_tests.sh
   - flake8 dask
+  - pycodestyle --select=E722 dask  # check for bare `except:` blocks
   - if [[ $TEST_IMPORTS == 'true' ]]; then source continuous_integration/travis/test_imports.sh; fi
-after_success: source continuous_integration/travis/after_success.sh
+
+after_success:
+  - source continuous_integration/travis/after_success.sh
 
 notifications:
   email: false
diff --git a/LICENSE.txt b/LICENSE.txt
index 0c01700..5f68378 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2014-2015, Continuum Analytics, Inc. and contributors
+Copyright (c) 2014-2017, Anaconda, Inc. and contributors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification,
@@ -11,9 +11,9 @@ Redistributions in binary form must reproduce the above copyright notice,
 this list of conditions and the following disclaimer in the documentation
 and/or other materials provided with the distribution.
 
-Neither the name of Continuum Analytics nor the names of any contributors
-may be used to endorse or promote products derived from this software
-without specific prior written permission.
+Neither the name of Anaconda nor the names of any contributors may be used to
+endorse or promote products derived from this software without specific prior
+written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index d3bb215..21024ae 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -1,10 +1,10 @@
 # Install conda
 case "$(uname -s)" in
     'Darwin')
-        MINICONDA_FILENAME="Miniconda3-latest-MacOSX-x86_64.sh"
+        MINICONDA_FILENAME="Miniconda3-4.3.21-MacOSX-x86_64.sh"
         ;;
     'Linux')
-        MINICONDA_FILENAME="Miniconda3-latest-Linux-x86_64.sh"
+        MINICONDA_FILENAME="Miniconda3-4.3.21-Linux-x86_64.sh"
         ;;
     *)  ;;
 esac
@@ -43,7 +43,6 @@ conda install -q -c conda-forge \
     ipython \
     partd \
     psutil \
-    pytables \
     "pytest<=3.1.1" \
     scikit-image \
     scikit-learn \
@@ -51,6 +50,9 @@ conda install -q -c conda-forge \
     sqlalchemy \
     toolz
 
+# install pytables from defaults for now
+conda install -q pytables
+
 pip install -q git+https://github.com/dask/partd --upgrade --no-deps
 pip install -q git+https://github.com/dask/zict --upgrade --no-deps
 pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
@@ -79,9 +81,10 @@ pip install -q \
     mmh3 \
     pandas_datareader \
     pytest-xdist \
-    xxhash
+    xxhash \
+    pycodestyle
 
 # Install dask
 pip install -q --no-deps -e .[complete]
-echo pip freeze
-pip freeze
+echo conda list
+conda list
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 483dbd3..9155423 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -1,14 +1,16 @@
 from __future__ import absolute_import, division, print_function
 
 from ..utils import ignoring
-from .core import (Array, stack, concatenate, take, tensordot, transpose,
-        from_array, choose, where, coarsen, insert, broadcast_to, ravel,
-        roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
-        digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
-        dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
-        from_delayed, round, swapaxes, repeat, asarray, asanyarray)
-from .core import (around, isnull, notnull, isclose, eye, triu,
-                   tril, diag, corrcoef)
+from .core import (Array, concatenate, stack, from_array, store, map_blocks,
+                   atop, to_hdf5, to_npy_stack, from_npy_stack, from_delayed,
+                   asarray, asanyarray, broadcast_to)
+from .routines import (take, choose, argwhere, where, coarsen, insert,
+                       ravel, roll, unique, squeeze, topk, ptp, diff, ediff1d,
+                       bincount, digitize, histogram, cov, array, dstack,
+                       vstack, hstack, compress, extract, round, count_nonzero,
+                       flatnonzero, nonzero, around, isnull, notnull, isclose,
+                       corrcoef, swapaxes, tensordot, transpose, dot,
+                       apply_along_axis, apply_over_axes, result_type)
 from .reshape import reshape
 from .ufunc import (add, subtract, multiply, divide, logaddexp, logaddexp2,
         true_divide, floor_divide, negative, power, remainder, mod, conj, exp,
@@ -30,10 +32,14 @@ from .reductions import (sum, prod, mean, std, var, any, all, min, max, vnorm,
 from .percentile import percentile
 with ignoring(ImportError):
     from .reductions import nanprod, nancumprod, nancumsum
+with ignoring(ImportError):
+    from . import ma
 from . import random, linalg, ghost, learn, fft
 from .wrap import ones, zeros, empty, full
+from .creation import ones_like, zeros_like, empty_like, full_like
 from .rechunk import rechunk
 from ..context import set_options
 from ..base import compute
 from .optimization import optimize
-from .creation import arange, linspace, indices
+from .creation import (arange, linspace, indices, diag, eye, triu, tril,
+                       fromfunction, tile, repeat)
diff --git a/dask/array/chunk.py b/dask/array/chunk.py
index f5e2d73..7bc6c72 100644
--- a/dask/array/chunk.py
+++ b/dask/array/chunk.py
@@ -191,3 +191,7 @@ def topk(k, x):
 def arange(start, stop, step, length, dtype):
     res = np.arange(start, stop, step, dtype)
     return res[:-1] if len(res) > length else res
+
+
+def astype(x, astype_dtype=None, **kwargs):
+    return x.astype(astype_dtype, **kwargs)
diff --git a/dask/array/core.py b/dask/array/core.py
index d7d5ac3..de1df97 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -4,10 +4,9 @@ from bisect import bisect
 from collections import Iterable, Mapping
 from collections import Iterator
 from functools import partial, wraps
-import inspect
 from itertools import product
 import math
-from numbers import Integral, Number
+from numbers import Number
 import operator
 from operator import add, getitem, mul
 import os
@@ -19,25 +18,24 @@ import uuid
 import warnings
 
 try:
-    from cytoolz.curried import (partition, concat, pluck, join, first,
-                                 groupby, valmap, accumulate, interleave,
-                                 sliding_window, assoc)
+    from cytoolz import (partition, concat, join, first,
+                         groupby, valmap, accumulate, assoc)
+    from cytoolz.curried import filter, pluck
 
 except ImportError:
-    from toolz.curried import (partition, concat, pluck, join, first,
-                               groupby, valmap, accumulate,
-                               interleave, sliding_window, assoc)
+    from toolz import (partition, concat, join, first,
+                       groupby, valmap, accumulate, assoc)
+    from toolz.curried import filter, pluck
 from toolz import pipe, map, reduce
 import numpy as np
 
 from . import chunk
-from .slicing import slice_array
-from . import numpy_compat
+from .slicing import slice_array, replace_ellipsis
 from ..base import Base, tokenize, normalize_token
 from ..context import _globals
 from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete,
                      is_integer, IndexCallable, funcname, derived_from,
-                     SerializableLock, ensure_dict, package_of)
+                     SerializableLock, ensure_dict, Dispatch)
 from ..compatibility import unicode, long, getargspec, zip_longest, apply
 from ..delayed import to_task_dask
 from .. import threaded, core
@@ -45,6 +43,20 @@ from .. import sharedict
 from ..sharedict import ShareDict
 
 
+concatenate_lookup = Dispatch('concatenate')
+tensordot_lookup = Dispatch('tensordot')
+concatenate_lookup.register((object, np.ndarray), np.concatenate)
+tensordot_lookup.register((object, np.ndarray), np.tensordot)
+
+
+ at tensordot_lookup.register_lazy('sparse')
+ at concatenate_lookup.register_lazy('sparse')
+def register_sparse():
+    import sparse
+    concatenate_lookup.register(sparse.COO, sparse.concatenate)
+    tensordot_lookup.register(sparse.COO, sparse.tensordot)
+
+
 def getter(a, b, asarray=True, lock=None):
     if isinstance(b, tuple) and any(x is None for x in b):
         b2 = tuple(x for x in b if x is not None)
@@ -269,8 +281,9 @@ def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)),
     {'i': 'Hello', 'j': (2, 3)}
     """
     # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
+    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
     L = concat([zip(inds, dims) for (x, inds), (x, dims)
-                in join(first, argpairs, first, numblocks.items())])
+                in join(first, argpairs2, first, numblocks.items())])
 
     g = groupby(0, L)
     g = dict((k, set([d for i, d in v])) for k, v in g.items())
@@ -380,6 +393,13 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
     {('z', 0): (apply, f, [('x', 0)], {'b': 10}),
      ('z', 1): (apply, f, [('x', 1)], {'b': 10})}
 
+    Include literals by indexing with ``None``
+
+    >>> top(add, 'z', 'i', 'x', 'i', 100, None,  numblocks={'x': (2,)})  # doctest: +SKIP
+    {('z', 0): (add, ('x', 0), 100),
+     ('z', 1): (add, ('x', 1), 100)}
+
+
     See Also
     --------
     atop
@@ -389,9 +409,9 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
     new_axes = kwargs.pop('new_axes', {})
     argpairs = list(partition(2, arrind_pairs))
 
-    assert set(numblocks) == set(pluck(0, argpairs))
+    assert set(numblocks) == {name for name, ind in argpairs if ind is not None}
 
-    all_indices = pipe(argpairs, pluck(1), concat, set)
+    all_indices = pipe(argpairs, pluck(1), filter(None), concat, set)
     dummy_indices = all_indices - set(out_indices)
 
     # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
@@ -407,20 +427,33 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
     # {j: [1, 2, 3], ...}  For j a dummy index of dimension 3
     dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)
 
+    dsk = {}
+
+    # Unpack dask values in non-array arguments
+    for i, (arg, ind) in enumerate(argpairs):
+        if ind is None:
+            arg2, dsk2 = to_task_dask(arg)
+            if dsk2:
+                dsk.update(ensure_dict(dsk2))
+                argpairs[i] = (arg2, ind)
+
     # Create argument lists
     valtups = []
     for kd in keydicts:
         args = []
         for arg, ind in argpairs:
-            tups = lol_tuples((arg,), ind, kd, dummies)
-            if any(nb == 1 for nb in numblocks[arg]):
-                tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+            if ind is None:
+                args.append(arg)
             else:
-                tups2 = tups
-            if concatenate and isinstance(tups2, list):
-                axes = [n for n, i in enumerate(ind) if i in dummies]
-                tups2 = (concatenate_axes, tups2, axes)
-            args.append(tups2)
+                tups = lol_tuples((arg,), ind, kd, dummies)
+                if any(nb == 1 for nb in numblocks[arg]):
+                    tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+                else:
+                    tups2 = tups
+                if concatenate and isinstance(tups2, list):
+                    axes = [n for n, i in enumerate(ind) if i in dummies]
+                    tups2 = (concatenate_axes, tups2, axes)
+                args.append(tups2)
         valtups.append(args)
 
     if not kwargs:  # will not be used in an apply, should be a tuple
@@ -429,7 +462,6 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
     # Add heads to tuples
     keys = [(output,) + kt for kt in keytups]
 
-    dsk = {}
     # Unpack delayed objects in kwargs
     if kwargs:
         task, dsk2 = to_task_dask(kwargs)
@@ -481,8 +513,8 @@ def _concatenate2(arrays, axes=[]):
         return arrays
     if len(axes) > 1:
         arrays = [_concatenate2(a, axes=axes[1:]) for a in arrays]
-    module = package_of(type(max(arrays, key=lambda x: x.__array_priority__))) or np
-    return module.concatenate(arrays, axis=axes[0])
+    concatenate = concatenate_lookup.dispatch(type(max(arrays, key=lambda x: x.__array_priority__)))
+    return concatenate(arrays, axis=axes[0])
 
 
 def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
@@ -637,9 +669,11 @@ def map_blocks(func, *args, **kwargs):
         new_axis = [new_axis]
 
     arrs = [a for a in args if isinstance(a, Array)]
-    other = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
 
-    argpairs = [(a.name, tuple(range(a.ndim))[::-1]) for a in arrs]
+    argpairs = [(a.name, tuple(range(a.ndim))[::-1])
+                if isinstance(a, Array)
+                else (a, None)
+                for a in args]
     numblocks = {a.name: a.numblocks for a in arrs}
     arginds = list(concat(argpairs))
     out_ind = tuple(range(max(a.ndim for a in arrs)))[::-1]
@@ -648,19 +682,14 @@ def map_blocks(func, *args, **kwargs):
         spec = getargspec(func)
         block_id = ('block_id' in spec.args or
                     'block_id' in getattr(spec, 'kwonly_args', ()))
-    except:
+    except Exception:
         block_id = False
 
     if block_id:
         kwargs['block_id'] = '__dummy__'
 
-    if other:
-        dsk = top(partial_by_order, name, out_ind, *arginds,
-                  numblocks=numblocks, function=func, other=other,
-                  **kwargs)
-    else:
-        dsk = top(func, name, out_ind, *arginds, numblocks=numblocks,
-                  **kwargs)
+    dsk = top(func, name, out_ind, *arginds, numblocks=numblocks,
+              **kwargs)
 
     # If func has block_id as an argument, add it to the kwargs for each call
     if block_id:
@@ -725,7 +754,7 @@ def map_blocks(func, *args, **kwargs):
         else:
             try:
                 chunks2 = list(broadcast_chunks(*[a.chunks for a in arrs]))
-            except:
+            except Exception:
                 raise ValueError("Arrays in `map_blocks` don't align, can't "
                                  "infer output chunks. Please provide "
                                  "`chunks` kwarg.")
@@ -783,56 +812,6 @@ def broadcast_chunks(*chunkss):
     return tuple(result)
 
 
- at wraps(np.squeeze)
-def squeeze(a, axis=None):
-    if 1 not in a.shape:
-        return a
-    if axis is None:
-        axis = tuple(i for i, d in enumerate(a.shape) if d == 1)
-    b = a.map_blocks(partial(np.squeeze, axis=axis), dtype=a.dtype)
-    chunks = tuple(bd for bd in b.chunks if bd != (1,))
-
-    name = 'squeeze-' + tokenize(a, axis)
-    old_keys = list(product([b.name], *[range(len(bd)) for bd in b.chunks]))
-    new_keys = list(product([name], *[range(len(bd)) for bd in chunks]))
-
-    dsk = {n: b.dask[o] for o, n in zip(old_keys, new_keys)}
-
-    return Array(sharedict.merge(b.dask, (name, dsk)), name, chunks, dtype=a.dtype)
-
-
-def topk(k, x):
-    """ The top k elements of an array
-
-    Returns the k greatest elements of the array in sorted order.  Only works
-    on arrays of a single dimension.
-
-    This assumes that ``k`` is small.  All results will be returned in a single
-    chunk.
-
-    Examples
-    --------
-
-    >>> x = np.array([5, 1, 3, 6])
-    >>> d = from_array(x, chunks=2)
-    >>> d.topk(2).compute()
-    array([6, 5])
-    """
-    if x.ndim != 1:
-        raise ValueError("Topk only works on arrays of one dimension")
-
-    token = tokenize(k, x)
-    name = 'chunk.topk-' + token
-    dsk = dict(((name, i), (chunk.topk, k, key))
-               for i, key in enumerate(x._keys()))
-    name2 = 'topk-' + token
-    dsk[(name2, 0)] = (getitem, (np.sort, (np.concatenate, list(dsk))),
-                       slice(-1, -k - 1, -1))
-    chunks = ((k,),)
-
-    return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype)
-
-
 def store(sources, targets, lock=True, regions=None, compute=True, **kwargs):
     """ Store dask arrays in array-like objects, overwrite data in target
 
@@ -963,6 +942,15 @@ def finalize(results):
     return unpack_singleton(results)
 
 
+CHUNKS_NONE_ERROR_MESSAGE = """
+You must specify a chunks= keyword argument.
+This specifies the chunksize of your array blocks.
+
+See the following documentation page for details:
+  http://dask.pydata.org/en/latest/array-creation.html#chunks
+""".strip()
+
+
 class Array(Base):
     """ Parallel Dask Array
 
@@ -987,7 +975,7 @@ class Array(Base):
     --------
     dask.array.from_array
     """
-    __slots__ = 'dask', 'name', '_chunks', 'dtype'
+    __slots__ = 'dask', '_name', '_cached_keys', '_chunks', 'dtype'
 
     _optimize = staticmethod(optimize)
     _default_get = staticmethod(threaded.get)
@@ -1004,7 +992,7 @@ class Array(Base):
         self.name = name
         self._chunks = normalize_chunks(chunks, shape)
         if self._chunks is None:
-            raise ValueError(chunks_none_error_message)
+            raise ValueError(CHUNKS_NONE_ERROR_MESSAGE)
         if dtype is None:
             raise ValueError("You must specify the dtype of the array")
         self.dtype = np.dtype(dtype)
@@ -1103,12 +1091,20 @@ class Array(Base):
         """ Length of one array element in bytes """
         return self.dtype.itemsize
 
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, val):
+        self._name = val
+        # Clear the key cache when the name is reset
+        self._cached_keys = None
+
     def _keys(self, *args):
         if not args:
-            try:
+            if self._cached_keys is not None:
                 return self._cached_keys
-            except AttributeError:
-                pass
 
         if not self.chunks:
             return [(self.name,)]
@@ -1187,6 +1183,7 @@ class Array(Base):
         return complex(self.compute())
 
     def __setitem__(self, key, value):
+        from .routines import where
         if isinstance(key, Array):
             if isinstance(value, Array) and value.ndim > 1:
                 raise ValueError('boolean index array should have 1 dimension')
@@ -1200,12 +1197,11 @@ class Array(Base):
                                       % type(key))
 
     def __getitem__(self, index):
-        out = 'getitem-' + tokenize(self, index)
-
         # Field access, e.g. x['a'] or x[['a', 'b']]
         if (isinstance(index, (str, unicode)) or
                 (isinstance(index, list) and index and
                  all(isinstance(i, (str, unicode)) for i in index))):
+            out = 'getitem-' + tokenize(self, index)
             if isinstance(index, (str, unicode)):
                 dt = self.dtype[index]
             else:
@@ -1219,55 +1215,67 @@ class Array(Base):
             else:
                 return self.map_blocks(getitem, index, dtype=dt, name=out)
 
-        # Slicing
-        if isinstance(index, Array):
-            return slice_with_dask_array(self, index)
-
         if not isinstance(index, tuple):
             index = (index,)
 
-        if any(isinstance(i, Array) for i in index):
-            raise NotImplementedError("Indexing with a dask Array")
+        from .slicing import normalize_index, slice_with_dask_array
+        index2 = normalize_index(index, self.shape)
+
+        if any(isinstance(i, Array) for i in index2):
+            self, index2 = slice_with_dask_array(self, index2)
 
-        if all(isinstance(i, slice) and i == slice(None) for i in index):
+        if all(isinstance(i, slice) and i == slice(None) for i in index2):
             return self
 
-        dsk, chunks = slice_array(out, self.name, self.chunks, index)
+        out = 'getitem-' + tokenize(self, index2)
+        dsk, chunks = slice_array(out, self.name, self.chunks, index2)
 
         dsk2 = sharedict.merge(self.dask, (out, dsk))
 
         return Array(dsk2, out, chunks, dtype=self.dtype)
 
     def _vindex(self, key):
-        if (not isinstance(key, tuple) or
-           not len([k for k in key if isinstance(k, (np.ndarray, list))]) >= 2 or
-           not all(isinstance(k, (np.ndarray, list)) or k == slice(None, None)
-                   for k in key)):
-            msg = ("vindex expects only lists and full slices\n"
-                   "At least two entries must be a list\n"
-                   "For other combinations try doing normal slicing first, followed\n"
-                   "by vindex slicing.  Got: \n\t%s")
-            raise IndexError(msg % str(key))
-        if any((isinstance(k, np.ndarray) and k.ndim != 1) or
-               (isinstance(k, list) and k and isinstance(k[0], list))
-               for k in key):
-            raise IndexError("vindex does not support multi-dimensional keys\n"
-                             "Got: %s" % str(key))
-        if len(set(len(k) for k in key if isinstance(k, (list, np.ndarray)))) != 1:
-            raise IndexError("All indexers must have the same length, got\n"
-                             "\t%s" % str(key))
-        key = key + (slice(None, None),) * (self.ndim - len(key))
-        key = [i if isinstance(i, list) else
-               i.tolist() if isinstance(i, np.ndarray) else
-               None for i in key]
+        if not isinstance(key, tuple):
+            key = (key,)
+        if any(k is None for k in key):
+            raise IndexError(
+                "vindex does not support indexing with None (np.newaxis), "
+                "got {}".format(key))
+        if all(isinstance(k, slice) for k in key):
+            raise IndexError(
+                "vindex requires at least one non-slice to vectorize over. "
+                "Use normal slicing instead when only using slices. Got: {}"
+                .format(key))
         return _vindex(self, *key)
 
     @property
     def vindex(self):
+        """Vectorized indexing with broadcasting.
+
+        This is equivalent to numpy's advanced indexing, using arrays that are
+        broadcast against each other. This allows for pointwise indexing:
+
+        >>> x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+        >>> x = from_array(x, chunks=2)
+        >>> x.vindex[[0, 1, 2], [0, 1, 2]].compute()
+        array([1, 5, 9])
+
+        Mixed basic/advanced indexing with slices/arrays is also supported. The
+        order of dimensions in the result follows those proposed for
+        ndarray.vindex [1]_: the subspace spanned by arrays is followed by all
+        slices.
+
+        Note: ``vindex`` provides more general functionality than standard
+        indexing, but it also has fewer optimizations and can be significantly
+        slower.
+
+        _[1]: https://github.com/numpy/numpy/pull/6256
+        """
         return IndexCallable(self._vindex)
 
-    @wraps(np.dot)
+    @derived_from(np.ndarray)
     def dot(self, other):
+        from .routines import tensordot
         return tensordot(self, other,
                          axes=((self.ndim - 1,), (other.ndim - 2,)))
 
@@ -1277,31 +1285,41 @@ class Array(Base):
 
     @property
     def T(self):
-        return transpose(self)
+        return self.transpose()
 
     @derived_from(np.ndarray)
     def transpose(self, *axes):
+        from .routines import transpose
         if not axes:
             axes = None
         elif len(axes) == 1 and isinstance(axes[0], Iterable):
             axes = axes[0]
         return transpose(self, axes=axes)
 
-    @wraps(np.ravel)
+    @derived_from(np.ndarray)
     def ravel(self):
+        from .routines import ravel
         return ravel(self)
 
     flatten = ravel
 
-    @wraps(np.reshape)
+    @derived_from(np.ndarray)
+    def choose(self, choices):
+        from .routines import choose
+        return choose(self, choices)
+
+    @derived_from(np.ndarray)
     def reshape(self, *shape):
         from .reshape import reshape
         if len(shape) == 1 and not isinstance(shape[0], Number):
             shape = shape[0]
         return reshape(self, shape)
 
-    @wraps(topk)
     def topk(self, k):
+        """The top k elements of an array.
+
+        See ``da.topk`` for docstring"""
+        from .routines import topk
         return topk(k, self)
 
     def astype(self, dtype, **kwargs):
@@ -1342,7 +1360,7 @@ class Array(Base):
                             " according to the rule "
                             "{2!r}".format(self.dtype, dtype, casting))
         name = 'astype-' + tokenize(self, dtype, casting, copy)
-        return self.map_blocks(_astype, dtype=dtype, name=name,
+        return self.map_blocks(chunk.astype, dtype=dtype, name=name,
                                astype_dtype=dtype, **kwargs)
 
     def __abs__(self):
@@ -1454,6 +1472,7 @@ class Array(Base):
         return elemwise(operator.xor, other, self)
 
     def __matmul__(self, other):
+        from .routines import tensordot
         if not hasattr(other, 'ndim'):
             other = np.asarray(other)  # account for array-like RHS
         if other.ndim > 2:
@@ -1466,6 +1485,7 @@ class Array(Base):
                                             (other.ndim - 2,)))
 
     def __rmatmul__(self, other):
+        from .routines import tensordot
         if not hasattr(other, 'ndim'):
             other = np.asarray(other)  # account for array-like on LHS
         if self.ndim > 2:
@@ -1477,69 +1497,69 @@ class Array(Base):
         return tensordot(other, self, axes=((other.ndim - 1,),
                                             (self.ndim - 2,)))
 
-    @wraps(np.any)
+    @derived_from(np.ndarray)
     def any(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import any
         return any(self, axis=axis, keepdims=keepdims, split_every=split_every,
                    out=out)
 
-    @wraps(np.all)
+    @derived_from(np.ndarray)
     def all(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import all
         return all(self, axis=axis, keepdims=keepdims, split_every=split_every,
                    out=out)
 
-    @wraps(np.min)
+    @derived_from(np.ndarray)
     def min(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import min
         return min(self, axis=axis, keepdims=keepdims, split_every=split_every,
                    out=out)
 
-    @wraps(np.max)
+    @derived_from(np.ndarray)
     def max(self, axis=None, keepdims=False, split_every=None, out=None):
         from .reductions import max
         return max(self, axis=axis, keepdims=keepdims, split_every=split_every,
                    out=out)
 
-    @wraps(np.argmin)
+    @derived_from(np.ndarray)
     def argmin(self, axis=None, split_every=None, out=None):
         from .reductions import argmin
         return argmin(self, axis=axis, split_every=split_every, out=out)
 
-    @wraps(np.argmax)
+    @derived_from(np.ndarray)
     def argmax(self, axis=None, split_every=None, out=None):
         from .reductions import argmax
         return argmax(self, axis=axis, split_every=split_every, out=out)
 
-    @wraps(np.sum)
+    @derived_from(np.ndarray)
     def sum(self, axis=None, dtype=None, keepdims=False, split_every=None,
             out=None):
         from .reductions import sum
         return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
                    split_every=split_every, out=out)
 
-    @wraps(np.prod)
+    @derived_from(np.ndarray)
     def prod(self, axis=None, dtype=None, keepdims=False, split_every=None,
              out=None):
         from .reductions import prod
         return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
                     split_every=split_every, out=out)
 
-    @wraps(np.mean)
+    @derived_from(np.ndarray)
     def mean(self, axis=None, dtype=None, keepdims=False, split_every=None,
              out=None):
         from .reductions import mean
         return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
                     split_every=split_every, out=out)
 
-    @wraps(np.std)
+    @derived_from(np.ndarray)
     def std(self, axis=None, dtype=None, keepdims=False, ddof=0,
             split_every=None, out=None):
         from .reductions import std
         return std(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof,
                    split_every=split_every, out=out)
 
-    @wraps(np.var)
+    @derived_from(np.ndarray)
     def var(self, axis=None, dtype=None, keepdims=False, ddof=0,
             split_every=None, out=None):
         from .reductions import var
@@ -1664,8 +1684,9 @@ class Array(Base):
         from .reductions import cumprod
         return cumprod(self, axis, dtype, out=out)
 
-    @wraps(squeeze)
+    @derived_from(np.ndarray)
     def squeeze(self):
+        from .routines import squeeze
         return squeeze(self)
 
     def rechunk(self, chunks, threshold=None, block_size_limit=None):
@@ -1687,7 +1708,7 @@ class Array(Base):
         from .ufunc import conj
         return conj(self)
 
-    @wraps(np.clip)
+    @derived_from(np.ndarray)
     def clip(self, min=None, max=None):
         from .ufunc import clip
         return clip(self, min, max)
@@ -1731,12 +1752,14 @@ class Array(Base):
         out._chunks = chunks
         return out
 
-    @wraps(np.swapaxes)
+    @derived_from(np.ndarray)
     def swapaxes(self, axis1, axis2):
+        from .routines import swapaxes
         return swapaxes(self, axis1, axis2)
 
-    @wraps(np.round)
+    @derived_from(np.ndarray)
     def round(self, decimals=0):
+        from .routines import round
         return round(self, decimals=decimals)
 
     def copy(self):
@@ -1764,10 +1787,16 @@ class Array(Base):
         L = ndeepmap(self.ndim, lambda k: Delayed(k, dsk), self._keys())
         return np.array(L, dtype=object)
 
-    @wraps(np.repeat)
+    @derived_from(np.ndarray)
     def repeat(self, repeats, axis=None):
+        from .creation import repeat
         return repeat(self, repeats, axis=axis)
 
+    @derived_from(np.ndarray)
+    def nonzero(self):
+        from .routines import nonzero
+        return nonzero(self)
+
 
 def ensure_int(f):
     i = int(f)
... 11559 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git



More information about the Python-modules-commits mailing list