[Python-modules-commits] [dask] 01/08: New upstream version 0.14.3

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jun 1 19:52:17 UTC 2017


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository dask.

commit ce18bbf5aaf2276d6d5c7657b83148a8a3676461
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Thu Jun 1 19:15:28 2017 +0100

    New upstream version 0.14.3
---
 .coveragerc                                        |   1 +
 .travis.yml                                        |  24 +-
 appveyor.yml                                       |   2 +-
 .../appveyor/setup_conda_environment.cmd           |   9 +-
 continuous_integration/travis/install.sh           |  55 ++-
 dask/array/__init__.py                             |   4 +-
 dask/array/conftest.py                             |   5 +-
 dask/array/core.py                                 | 238 +++++++++---
 dask/array/creation.py                             |  63 +++-
 dask/array/fft.py                                  | 124 +++++--
 dask/array/ghost.py                                |   5 +-
 dask/array/optimization.py                         |   4 +-
 dask/array/rechunk.py                              |  78 +++-
 dask/array/reshape.py                              |   8 +-
 dask/array/tests/test_array_core.py                | 260 +++++++++++--
 dask/array/tests/test_creation.py                  |  54 +++
 dask/array/tests/test_fft.py                       | 212 ++++++-----
 dask/array/tests/test_ghost.py                     |  15 +
 dask/array/tests/test_linalg.py                    |   2 +
 dask/array/tests/test_optimization.py              |  13 +
 dask/array/tests/test_rechunk.py                   | 193 +++++++++-
 dask/array/tests/test_reductions.py                |   1 +
 dask/array/tests/test_slicing.py                   |   1 +
 dask/array/tests/test_sparse.py                    | 130 +++++++
 dask/array/utils.py                                |  38 +-
 dask/async.py                                      |  29 +-
 dask/bag/core.py                                   |  32 +-
 dask/bag/tests/test_bag.py                         |  68 ++--
 dask/base.py                                       |  63 ++--
 dask/bytes/core.py                                 |   2 +-
 dask/bytes/local.py                                |  26 +-
 dask/bytes/s3.py                                   |  26 +-
 dask/bytes/tests/test_local.py                     |  41 +-
 dask/context.py                                    |  45 +++
 dask/core.py                                       |  22 +-
 dask/dataframe/__init__.py                         |   4 +-
 dask/dataframe/core.py                             | 207 ++++++++---
 dask/dataframe/groupby.py                          |  40 +-
 dask/dataframe/hashing.py                          |  27 +-
 dask/dataframe/io/__init__.py                      |   1 +
 dask/dataframe/io/csv.py                           |  47 ++-
 dask/dataframe/io/parquet.py                       | 267 +++++++++----
 dask/dataframe/io/sql.py                           | 131 +++++++
 dask/dataframe/io/tests/test_csv.py                |  48 +++
 dask/dataframe/io/tests/test_io.py                 |   6 +-
 dask/dataframe/io/tests/test_parquet.py            | 110 ++++--
 dask/dataframe/io/tests/test_sql.py                | 148 ++++++++
 dask/dataframe/methods.py                          |  27 +-
 dask/dataframe/multi.py                            |   4 +-
 dask/dataframe/optimize.py                         |   9 +-
 dask/dataframe/reshape.py                          |   1 +
 dask/dataframe/rolling.py                          | 152 +++++++-
 dask/dataframe/shuffle.py                          |  42 ++-
 dask/dataframe/tests/test_dataframe.py             | 413 ++++++++-------------
 dask/dataframe/tests/test_format.py                |  41 +-
 dask/dataframe/tests/test_groupby.py               |  56 ++-
 dask/dataframe/tests/test_hashing.py               |   9 +
 dask/dataframe/tests/test_indexing.py              |   8 +-
 dask/dataframe/tests/test_multi.py                 |   6 +-
 dask/dataframe/tests/test_optimize_dataframe.py    |   2 +-
 dask/dataframe/tests/test_reshape.py               |  24 +-
 dask/dataframe/tests/test_rolling.py               |  92 ++++-
 dask/dataframe/tests/test_shuffle.py               | 297 ++++++++++++++-
 dask/dataframe/tseries/resample.py                 |   7 +-
 dask/dataframe/utils.py                            |  43 ++-
 dask/delayed.py                                    |  58 ++-
 dask/diagnostics/profile.py                        |  15 +-
 dask/distributed.py                                |  13 +-
 dask/optimize.py                                   |  48 +--
 dask/tests/test_base.py                            |  45 +++
 dask/tests/test_context.py                         |  22 +-
 dask/tests/test_delayed.py                         |  41 ++
 dask/tests/test_distributed.py                     |  42 ++-
 dask/tests/test_optimize.py                        |  34 +-
 dask/tests/test_threaded.py                        |  44 ++-
 dask/tests/test_utils.py                           |  13 +-
 dask/utils.py                                      |  24 +-
 docs/requirements-docs.txt                         |   1 -
 docs/source/array-api.rst                          |  57 ++-
 docs/source/array-creation.rst                     |  71 ++++
 docs/source/array-sparse.rst                       |  80 ++++
 docs/source/array.rst                              |   1 +
 docs/source/bag-creation.rst                       |   5 +-
 docs/source/changelog.rst                          |  48 +++
 docs/source/conf.py                                |   2 +-
 docs/source/dataframe-api.rst                      |  43 ++-
 docs/source/dataframe-create.rst                   |  15 +-
 docs/source/dataframe-performance.rst              |  39 +-
 docs/source/debugging.rst                          | 238 ++++++++++++
 docs/source/index.rst                              |   3 +
 docs/source/machine-learning.rst                   | 111 ++++++
 docs/source/optimize.rst                           |  39 ++
 docs/source/scheduler-choice.rst                   |  32 +-
 93 files changed, 4333 insertions(+), 1013 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index be2863c..6b7b4ba 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -2,5 +2,6 @@
 omit =
     */test_*.py
     dask/compatibility.py
+    dask/_version.py
 source = 
     dask
diff --git a/.travis.yml b/.travis.yml
index 4e24150..911d6f7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,25 @@
-language: python
+language: generic
 sudo: false
+os: linux
 
 env:
   matrix:
     - PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=2.7 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+    - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
     - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.5 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
-    - PYTHON=3.6 NUMPY=1.11.2 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+    - PYTHON=3.5 NUMPY=1.11.3 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
+    - PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
 
-addons:
-    apt:
-        packages:
-        - graphviz
-        - liblzma-dev
+matrix:
+  fast_finish: true
+  include:
+  - os: osx
+    env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+  # Together with fast_finish, allow build to be marked successful before the OS X job finishes
+  allow_failures:
+  - os: osx
+    # This needs to be the exact same line as above
+    env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
 
 install: source continuous_integration/travis/install.sh
 script:
diff --git a/appveyor.yml b/appveyor.yml
index b0837c3..55f9c62 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -12,7 +12,7 @@ environment:
     # Since appveyor is quite slow, we only use a single configuration
     - PYTHON: "3.5"
       ARCH: "64"
-      NUMPY: "1.11"
+      NUMPY: "1.12.1"
       PANDAS: "0.19.2"
       CONDA_ENV: testenv
 
diff --git a/continuous_integration/appveyor/setup_conda_environment.cmd b/continuous_integration/appveyor/setup_conda_environment.cmd
index fd4623e..9773097 100644
--- a/continuous_integration/appveyor/setup_conda_environment.cmd
+++ b/continuous_integration/appveyor/setup_conda_environment.cmd
@@ -19,13 +19,20 @@ conda create -n %CONDA_ENV% -q -y python=%PYTHON% pytest toolz
 
 call activate %CONDA_ENV%
 
+ at rem Pin matrix items
+ at rem Please see PR ( https://github.com/dask/dask/pull/2185 ) for details.
+copy NUL %CONDA_PREFIX%\conda-meta\pinned
+echo numpy %NUMPY% >> %CONDA_PREFIX%\conda-meta\pinned
+echo pandas %PANDAS% >> %CONDA_PREFIX%\conda-meta\pinned
+
 @rem Install optional dependencies for tests
-%CONDA_INSTALL% numpy=%NUMPY% pandas=%PANDAS% cloudpickle distributed
+%CONDA_INSTALL% numpy pandas cloudpickle distributed
 %CONDA_INSTALL% s3fs psutil pytables bokeh bcolz scipy h5py ipython
 
 %PIP_INSTALL% git+https://github.com/dask/partd --upgrade
 %PIP_INSTALL% git+https://github.com/dask/cachey --upgrade
 %PIP_INSTALL% git+https://github.com/dask/distributed --upgrade
+%PIP_INSTALL% git+https://github.com/mrocklin/sparse --upgrade
 %PIP_INSTALL% blosc --upgrade
 %PIP_INSTALL% moto
 
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index 901e74a..37aedcf 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -1,66 +1,87 @@
 # Install conda
-wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+case "$(uname -s)" in
+    'Darwin')
+        MINICONDA_FILENAME="Miniconda3-latest-MacOSX-x86_64.sh"
+        ;;
+    'Linux')
+        MINICONDA_FILENAME="Miniconda3-latest-Linux-x86_64.sh"
+        ;;
+    *)  ;;
+esac
+
+
+wget https://repo.continuum.io/miniconda/$MINICONDA_FILENAME -O miniconda.sh
 bash miniconda.sh -b -p $HOME/miniconda
 export PATH="$HOME/miniconda/bin:$PATH"
 conda config --set always_yes yes --set changeps1 no
 
 # Create conda environment
-conda create -n test-environment python=$PYTHON
+conda create -q -n test-environment python=$PYTHON
 source activate test-environment
 
+# Pin matrix items
+# Please see PR ( https://github.com/dask/dask/pull/2185 ) for details.
+touch $CONDA_PREFIX/conda-meta/pinned
+echo "numpy $NUMPY" >> $CONDA_PREFIX/conda-meta/pinned
+echo "pandas $PANDAS" >> $CONDA_PREFIX/conda-meta/pinned
+
 # Install dependencies.
 # XXX: Due to a weird conda dependency resolution issue, we need to install
 # dependencies in two separate calls, otherwise we sometimes get version
 # incompatible with the installed version of numpy leading to crashes. This
 # seems to have to do with differences between conda-forge and defaults.
-conda install -c conda-forge \
-    numpy=$NUMPY \
-    pandas=$PANDAS \
+conda install -q -c conda-forge \
+    numpy \
+    pandas \
     bcolz \
     blosc \
+    bokeh \
     chest \
     coverage \
     cytoolz \
+    graphviz \
     h5py \
     ipython \
     partd \
     psutil \
+    pyarrow \
     pytables \
     pytest \
+    scikit-image \
     scikit-learn \
     scipy \
+    sqlalchemy \
     toolz
 
 # Specify numpy/pandas here to prevent upgrade/downgrade
-conda install -c conda-forge \
-    numpy=$NUMPY \
-    pandas=$PANDAS \
+conda install -q -c conda-forge \
     distributed \
     cloudpickle \
-    bokeh \
 
-pip install git+https://github.com/dask/zict --upgrade --no-deps
-pip install git+https://github.com/dask/distributed --upgrade --no-deps
+pip install -q git+https://github.com/dask/partd --upgrade --no-deps
+pip install -q git+https://github.com/dask/zict --upgrade --no-deps
+pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
+pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
 
 if [[ $PYTHONOPTIMIZE != '2' ]]; then
-    conda install -c conda-forge numba cython
-    pip install git+https://github.com/dask/fastparquet
+    conda install -q -c conda-forge numba cython
+    pip install -q git+https://github.com/dask/fastparquet
 fi
 
 if [[ $PYTHON == '2.7' ]]; then
-    pip install backports.lzma mock
+    pip install -q backports.lzma mock
 fi
 
-pip install \
+pip install -q \
     cachey \
     graphviz \
     moto \
     --upgrade --no-deps
 
-pip install \
+pip install -q \
     flake8 \
     pandas_datareader \
     pytest-xdist
 
 # Install dask
-pip install --no-deps -e .[complete]
+pip install -q --no-deps -e .[complete]
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 5ad784b..22270ac 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, division, print_function
 from ..utils import ignoring
 from .core import (Array, stack, concatenate, take, tensordot, transpose,
         from_array, choose, where, coarsen, insert, broadcast_to, ravel,
-        fromfunction, unique, store, squeeze, topk, bincount,
+        roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
         digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
         dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
         from_delayed, round, swapaxes, repeat, asarray)
@@ -32,4 +32,4 @@ from .rechunk import rechunk
 from ..context import set_options
 from ..base import compute
 from .optimization import optimize
-from .creation import arange, linspace
+from .creation import arange, linspace, indices
diff --git a/dask/array/conftest.py b/dask/array/conftest.py
index e550319..25f0d1b 100644
--- a/dask/array/conftest.py
+++ b/dask/array/conftest.py
@@ -1,3 +1,6 @@
+import os
+
+
 def pytest_ignore_collect(path, config):
-    if 'fft.py' in str(path):
+    if os.path.split(str(path))[1].startswith("fft.py"):
         return True
diff --git a/dask/array/core.py b/dask/array/core.py
index c70c26f..d9d47f4 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -6,7 +6,7 @@ from collections import Iterator
 from functools import partial, wraps
 import inspect
 from itertools import product
-from numbers import Number
+from numbers import Integral, Number
 import operator
 from operator import add, getitem, mul
 import os
@@ -17,7 +17,6 @@ from threading import Lock
 import uuid
 import warnings
 
-import toolz
 try:
     from cytoolz.curried import (partition, concat, pluck, join, first,
                                  groupby, valmap, accumulate, interleave,
@@ -34,9 +33,10 @@ from . import chunk
 from .slicing import slice_array
 from . import numpy_compat
 from ..base import Base, tokenize, normalize_token
+from ..context import _globals
 from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete,
                      is_integer, IndexCallable, funcname, derived_from,
-                     SerializableLock, ensure_dict)
+                     SerializableLock, ensure_dict, package_of)
 from ..compatibility import unicode, long, getargspec, zip_longest, apply
 from ..delayed import to_task_dask
 from .. import threaded, core
@@ -102,7 +102,7 @@ def slices_from_chunks(chunks):
             for start, shape in zip(starts, shapes)]
 
 
-def getem(arr, chunks, shape=None, out_name=None, fancy=True, lock=False):
+def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
     """ Dask getting various chunks from an array-like
 
     >>> getem('X', chunks=(2, 3), shape=(4, 6))  # doctest: +SKIP
@@ -122,12 +122,11 @@ def getem(arr, chunks, shape=None, out_name=None, fancy=True, lock=False):
 
     keys = list(product([out_name], *[range(len(bds)) for bds in chunks]))
     slices = slices_from_chunks(chunks)
-    getter = getarray if fancy else getarray_nofancy
 
     if lock:
-        values = [(getter, arr, x, lock) for x in slices]
+        values = [(getitem, arr, x, lock) for x in slices]
     else:
-        values = [(getter, arr, x) for x in slices]
+        values = [(getitem, arr, x) for x in slices]
 
     return dict(zip(keys, values))
 
@@ -466,7 +465,8 @@ def _concatenate2(arrays, axes=[]):
         return arrays
     if len(axes) > 1:
         arrays = [_concatenate2(a, axes=axes[1:]) for a in arrays]
-    return np.concatenate(arrays, axis=axes[0])
+    module = package_of(type(max(arrays, key=lambda x: x.__array_priority__))) or np
+    return module.concatenate(arrays, axis=axes[0])
 
 
 def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
@@ -513,7 +513,8 @@ def map_blocks(func, *args, **kwargs):
     drop_axis : number or iterable, optional
         Dimensions lost by the function.
     new_axis : number or iterable, optional
-        New dimensions created by the function.
+        New dimensions created by the function. Note that these are applied
+        after ``drop_axis`` (if present).
     name : string, optional
         The key name to use for the array. If not provided, will be determined
         by a hash of the arguments.
@@ -610,9 +611,6 @@ def map_blocks(func, *args, **kwargs):
     if isinstance(new_axis, Number):
         new_axis = [new_axis]
 
-    if drop_axis and new_axis:
-        raise ValueError("Can't specify drop_axis and new_axis together")
-
     arrs = [a for a in args if isinstance(a, Array)]
     other = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
 
@@ -665,15 +663,22 @@ def map_blocks(func, *args, **kwargs):
                           if i - 1 not in drop_axis), v)
                    for k, v in dsk.items())
         numblocks = [n for i, n in enumerate(numblocks) if i not in drop_axis]
-    elif new_axis:
+    if new_axis:
+        new_axis = sorted(new_axis)
+        for i in new_axis:
+            if not 0 <= i <= len(numblocks):
+                ndim = len(numblocks)
+                raise ValueError("Can't add axis %d when current "
+                                 "axis are %r. Missing axis: "
+                                 "%r" % (i, list(range(ndim)),
+                                         list(range(ndim, i))))
+            numblocks.insert(i, 1)
         dsk, old_dsk = dict(), dsk
         for key in old_dsk:
             new_key = list(key)
             for i in new_axis:
                 new_key.insert(i + 1, 0)
             dsk[tuple(new_key)] = old_dsk[key]
-        for i in sorted(new_axis):
-            numblocks.insert(i, 1)
 
     if chunks:
         if len(chunks) != len(numblocks):
@@ -701,7 +706,7 @@ def map_blocks(func, *args, **kwargs):
                                  "`chunks` kwarg.")
         if drop_axis:
             chunks2 = [c for (i, c) in enumerate(chunks2) if i not in drop_axis]
-        elif new_axis:
+        if new_axis:
             for i in sorted(new_axis):
                 chunks2.insert(i, (1,))
 
@@ -868,11 +873,25 @@ def store(sources, targets, lock=True, regions=None, compute=True, **kwargs):
         raise ValueError("Different number of sources [%d] and targets [%d] than regions [%d]"
                          % (len(sources), len(targets), len(regions)))
 
-    updates = [insert_to_ooc(tgt, src, lock=lock, region=reg)
-               for tgt, src, reg in zip(targets, sources, regions)]
-    keys = [key for u in updates for key in u]
+    updates = {}
+    keys = []
+    for tgt, src, reg in zip(targets, sources, regions):
+        # if out is a delayed object update dictionary accordingly
+        try:
+            dsk = {}
+            dsk.update(tgt.dask)
+            tgt = tgt.key
+        except AttributeError:
+            dsk = {}
+
+        update = insert_to_ooc(tgt, src, lock=lock, region=reg)
+        keys.extend(update)
+
+        update.update(dsk)
+        updates.update(update)
+
     name = 'store-' + tokenize(*keys)
-    dsk = sharedict.merge((name, toolz.merge(updates)), *[src.dask for src in sources])
+    dsk = sharedict.merge((name, updates), *[src.dask for src in sources])
     if compute:
         Array._get(dsk, keys, **kwargs)
     else:
@@ -949,7 +968,8 @@ class Array(Base):
     _default_get = staticmethod(threaded.get)
     _finalize = staticmethod(finalize)
 
-    def __init__(self, dask, name, chunks, dtype, shape=None):
+    def __new__(cls, dask, name, chunks, dtype, shape=None):
+        self = super(Array, cls).__new__(cls)
         assert isinstance(dask, Mapping)
         if not isinstance(dask, ShareDict):
             s = ShareDict()
@@ -964,15 +984,15 @@ class Array(Base):
             raise ValueError("You must specify the dtype of the array")
         self.dtype = np.dtype(dtype)
 
-    @property
-    def _args(self):
-        return (self.dask, self.name, self.chunks, self.dtype)
+        for plugin in _globals.get('array_plugins', ()):
+            result = plugin(self)
+            if result is not None:
+                self = result
 
-    def __getstate__(self):
-        return self._args
+        return self
 
-    def __setstate__(self, state):
-        self.dask, self.name, self._chunks, self.dtype = state
+    def __reduce__(self):
+        return (Array, (self.dask, self.name, self.chunks, self.dtype))
 
     @property
     def numblocks(self):
@@ -1770,10 +1790,11 @@ def normalize_chunks(chunks, shape=None):
     """
     if chunks is None:
         raise ValueError(chunks_none_error_message)
-    if isinstance(chunks, list):
-        chunks = tuple(chunks)
-    if isinstance(chunks, Number):
-        chunks = (chunks,) * len(shape)
+    if type(chunks) is not tuple:
+        if type(chunks) is list:
+            chunks = tuple(chunks)
+        if isinstance(chunks, Number):
+            chunks = (chunks,) * len(shape)
     if not chunks and shape and all(s == 0 for s in shape):
         chunks = ((),) * len(shape)
 
@@ -1794,7 +1815,7 @@ def normalize_chunks(chunks, shape=None):
     return tuple(map(tuple, chunks))
 
 
-def from_array(x, chunks, name=None, lock=False, fancy=True):
+def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
     """ Create dask array from something that looks like an array
 
     Input must have a ``.shape`` and support numpy-style slicing.
@@ -1848,7 +1869,13 @@ def from_array(x, chunks, name=None, lock=False, fancy=True):
         original_name = name
     if lock is True:
         lock = SerializableLock()
-    dsk = getem(original_name, chunks, out_name=name, fancy=fancy, lock=lock)
+
+    if getitem is None:
+        if fancy:
+            getitem = getarray
+        else:
+            getitem = getarray_nofancy
+    dsk = getem(original_name, chunks, getitem, out_name=name, lock=lock)
     dsk[original_name] = x
     return Array(dsk, name, chunks, dtype=x.dtype)
 
@@ -2149,7 +2176,7 @@ def atop(func, out_ind, *args, **kwargs):
     argindsstr = list(concat([(a.name, ind) for a, ind in arginds]))
     # Finish up the name
     if not out:
-        out = '%s-%s' % (token or funcname(func),
+        out = '%s-%s' % (token or funcname(func).strip('_'),
                          tokenize(func, out_ind, argindsstr, dtype, **kwargs))
 
     dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks, **kwargs)
@@ -2239,6 +2266,9 @@ def stack(seq, axis=0):
     uc_args = list(concat((x, ind) for x in seq))
     _, seq = unify_chunks(*uc_args)
 
+    dt = reduce(np.promote_types, [a.dtype for a in seq])
+    seq = [x.astype(dt) for x in seq]
+
     assert len(set(a.chunks for a in seq)) == 1  # same chunks
     chunks = (seq[0].chunks[:axis] + ((1,) * n,) + seq[0].chunks[axis:])
 
@@ -2255,18 +2285,27 @@ def stack(seq, axis=0):
     dsk = dict(zip(keys, values))
     dsk2 = sharedict.merge((name, dsk), *[a.dask for a in seq])
 
-    dt = reduce(np.promote_types, [a.dtype for a in seq])
-
     return Array(dsk2, name, chunks, dtype=dt)
 
 
-def concatenate(seq, axis=0):
+def concatenate(seq, axis=0, allow_unknown_chunksizes=False):
     """
     Concatenate arrays along an existing axis
 
     Given a sequence of dask Arrays form a new dask Array by stacking them
     along an existing dimension (axis=0 by default)
 
+    Parameters
+    ----------
+    seq: list of dask.arrays
+    axis: int
+        Dimension along which to align all of the arrays
+    allow_unknown_chunksizes: bool
+        Allow unknown chunksizes, such as come from converting from dask
+        dataframes.  Dask.array is unable to verify that chunks line up.  If
+        data comes from differently aligned sources then this can cause
+        unexpected results.
+
     Examples
     --------
 
@@ -2299,8 +2338,14 @@ def concatenate(seq, axis=0):
         msg = ("Axis must be less than than number of dimensions"
                "\nData has %d dimensions, but got axis=%d")
         raise ValueError(msg % (ndim, axis))
-    if not all(i == axis or all(x.shape[i] == seq[0].shape[i] for x in seq)
-               for i in range(ndim)):
+    if (not allow_unknown_chunksizes and
+        not all(i == axis or all(x.shape[i] == seq[0].shape[i] for x in seq)
+                for i in range(ndim))):
+        if any(map(np.isnan, seq[0].shape)):
+            raise ValueError("Tried to concatenate arrays with unknown"
+                             " shape %s.  To force concatenation pass"
+                             " allow_unknown_chunksizes=True."
+                             % str(seq[0].shape))
         raise ValueError("Shapes do not align: %s", [x.shape for x in seq])
 
     inds = [list(range(ndim)) for i in range(n)]
@@ -2425,15 +2470,25 @@ def transpose(a, axes=None):
     else:
         axes = tuple(range(a.ndim))[::-1]
     axes = tuple(d + a.ndim if d < 0 else d for d in axes)
-    return atop(partial(np.transpose, axes=axes),
-                axes,
-                a, tuple(range(a.ndim)), dtype=a.dtype)
+    return atop(np.transpose, axes, a, tuple(range(a.ndim)),
+                dtype=a.dtype, axes=axes)
 
 
 alphabet = 'abcdefghijklmnopqrstuvwxyz'
 ALPHABET = alphabet.upper()
 
 
+def _tensordot(a, b, axes):
+    x = max([a, b], key=lambda x: x.__array_priority__)
+    module = package_of(type(x)) or np
+    x = module.tensordot(a, b, axes=axes)
+    ind = [slice(None, None)] * x.ndim
+    for a in sorted(axes[0]):
+        ind.insert(a, None)
+    x = x[tuple(ind)]
+    return x
+
+
 @wraps(np.tensordot)
 def tensordot(lhs, rhs, axes=2):
     if isinstance(axes, Iterable):
@@ -2451,10 +2506,6 @@ def tensordot(lhs, rhs, axes=2):
     if isinstance(right_axes, list):
         right_axes = tuple(right_axes)
 
-    if len(left_axes) > 1:
-        raise NotImplementedError("Simultaneous Contractions of multiple "
-                                  "indices not yet supported")
-
     dt = np.promote_types(lhs.dtype, rhs.dtype)
 
     left_index = list(alphabet[:lhs.ndim])
@@ -2465,16 +2516,13 @@ def tensordot(lhs, rhs, axes=2):
         out_index.remove(right_index[r])
         right_index[r] = left_index[l]
 
-    intermediate = atop(np.tensordot, out_index,
+    intermediate = atop(_tensordot, out_index,
                         lhs, left_index,
                         rhs, right_index, dtype=dt,
                         axes=(left_axes, right_axes))
 
-    int_index = list(out_index)
-    for l in left_axes:
-        out_index.remove(left_index[l])
-
-    return atop(sum, out_index, intermediate, int_index, dtype=dt)
+    result = intermediate.sum(axis=left_axes)
+    return result
 
 
 @wraps(np.dot)
@@ -2486,7 +2534,7 @@ def insert_to_ooc(out, arr, lock=True, region=None):
     if lock is True:
         lock = Lock()
 
-    def store(x, index, lock, region):
+    def store(out, x, index, lock, region):
         if lock:
             lock.acquire()
         try:
@@ -2503,8 +2551,9 @@ def insert_to_ooc(out, arr, lock=True, region=None):
     slices = slices_from_chunks(arr.chunks)
 
     name = 'store-%s' % arr.name
-    dsk = dict(((name,) + t[1:], (store, t, slc, lock, region))
+    dsk = dict(((name,) + t[1:], (store, out, t, slc, lock, region))
                for t, slc in zip(core.flatten(arr._keys()), slices))
+
     return dsk
 
 
@@ -2618,6 +2667,8 @@ def elemwise(op, *args, **kwargs):
         msg = "%s does not take the following keyword arguments %s"
         raise TypeError(msg % (op.__name__, str(sorted(set(kwargs) - set(['name', 'dtype'])))))
 
+    args = [np.asarray(a) if isinstance(a, (list, tuple)) else a for a in args]
+
     shapes = [getattr(arg, 'shape', ()) for arg in args]
     shapes = [s if isinstance(s, Iterable) else () for s in shapes]
     out_ndim = len(broadcast_shapes(*shapes))   # Raises ValueError if dimensions mismatch
@@ -2831,6 +2882,54 @@ def ravel(array):
     return array.reshape((-1,))
 
 
+ at wraps(np.roll)
+def roll(array, shift, axis=None):
+    result = array
+
+    if axis is None:
+        result = ravel(result)
+
+        if not isinstance(shift, Integral):
+            raise TypeError(
+                "Expect `shift` to be an instance of Integral"
+                " when `axis` is None."
+            )
+
+        shift = (shift,)
+        axis = (0,)
+    else:
+        try:
+            len(shift)
+        except TypeError:
+            shift = (shift,)
+        try:
+            len(axis)
+        except TypeError:
+            axis = (axis,)
+
+    if len(shift) != len(axis):
+        raise ValueError("Must have the same number of shifts as axes.")
+
+    for i, s in zip(axis, shift):
+        s = -s
+        s %= result.shape[i]
+
+        sl1 = result.ndim * [slice(None)]
+        sl2 = result.ndim * [slice(None)]
+
+        sl1[i] = slice(s, None)
+        sl2[i] = slice(None, s)
+
+        sl1 = tuple(sl1)
+        sl2 = tuple(sl2)
+
+        result = concatenate([result[sl1], result[sl2]], axis=i)
+
+    result = result.reshape(array.shape)
+
+    return result
+
+
 def offset_func(func, offset, *args):
     """  Offsets inputs by offset
 
@@ -3329,11 +3428,19 @@ def concatenate3(arrays):
            [1, 2, 1, 2]])
     """
     arrays = concrete(arrays)
+    if not arrays:
+        return np.empty(0)
+
+    advanced = max(core.flatten(arrays, container=(list, tuple)),
+                   key=lambda x: getattr(x, '__array_priority__', 0))
+    module = package_of(type(advanced)) or np
+    if module is not np and hasattr(module, 'concatenate'):
+        x = unpack_singleton(arrays)
+        return _concatenate2(arrays, axes=list(range(x.ndim)))
+
     ndim = ndimlist(arrays)
     if not ndim:
         return arrays
-    if not arrays:
-        return np.empty(0)
     chunks = chunks_from_arrays(arrays)
     shape = tuple(map(sum, chunks))
 
@@ -3749,7 +3856,7 @@ def swapaxes(a, axis1, axis2):
                 dtype=a.dtype)
 
 
- at wraps(np.dot)
+ at wraps(np.repeat)
 def repeat(a, repeats, axis=None):
     if axis is None:
         if a.ndim == 1:
@@ -3790,6 +3897,21 @@ def repeat(a, repeats, axis=None):
     return concatenate(out, axis=axis)
 
 
+ at wraps(np.tile)
+def tile(A, reps):
+    if not isinstance(reps, Integral):
+        raise NotImplementedError("Only integer valued `reps` supported.")
+
+    if reps < 0:
+        raise ValueError("Negative `reps` are not allowed.")
+    elif reps == 0:
+        return A[..., :0]
+    elif reps == 1:
+        return A
+
+    return concatenate(reps * [A], axis=-1)
+
+
 def slice_with_dask_array(x, index):
     y = elemwise(getitem, x, index, dtype=x.dtype)
 
diff --git a/dask/array/creation.py b/dask/array/creation.py
index 1041e77..095dcef 100644
--- a/dask/array/creation.py
+++ b/dask/array/creation.py
@@ -1,10 +1,12 @@
 from __future__ import absolute_import, division, print_function
 
 from functools import partial
+import itertools
 
 import numpy as np
 
-from .core import Array, normalize_chunks
+from .core import Array, normalize_chunks, stack
+from .wrap import empty
 from . import chunk
 from ..base import tokenize
 
@@ -135,3 +137,62 @@ def arange(*args, **kwargs):
         elem_count += bs
 
     return Array(dsk, name, chunks, dtype=dtype)
+
+
+def indices(dimensions, dtype=int, chunks=None):
+    """
+    Implements NumPy's ``indices`` for Dask Arrays.
+
+    Generates a grid of indices covering the dimensions provided.
+
+    The final array has the shape ``(len(dimensions), *dimensions)``. The
+    chunks are used to specify the chunking for axis 1 up to
+    ``len(dimensions)``. The 0th axis always has chunks of length 1.
+
+    Parameters
+    ----------
+    dimensions : sequence of ints
+        The shape of the index grid.
+    dtype : dtype, optional
+        Type to use for the array. Default is ``int``.
+    chunks : sequence of ints
+        The number of samples on each block. Note that the last block will have
+        fewer samples if ``len(array) % chunks != 0``.
+
+    Returns
+    -------
+    grid : dask array
+    """
+    if chunks is None:
+        raise ValueError("Must supply a chunks= keyword argument")
+
+    dimensions = tuple(dimensions)
+    dtype = np.dtype(dtype)
+    chunks = tuple(chunks)
+
+    if len(dimensions) != len(chunks):
+        raise ValueError("Need one more chunk than dimensions.")
+
+    grid = []
+    if np.prod(dimensions):
+        for i in range(len(dimensions)):
+            s = len(dimensions) * [None]
+            s[i] = slice(None)
+            s = tuple(s)
+
+            r = arange(dimensions[i], dtype=dtype, chunks=chunks[i])
+            r = r[s]
+
+            for j in itertools.chain(range(i), range(i + 1, len(dimensions))):
+                r = r.repeat(dimensions[j], axis=j)
+
+            grid.append(r)
+
+    if grid:
+        grid = stack(grid)
+    else:
+        grid = empty(
+            (len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks
+        )
+
+    return grid
diff --git a/dask/array/fft.py b/dask/array/fft.py
index f0d63f8..cfe478f 100644
--- a/dask/array/fft.py
+++ b/dask/array/fft.py
@@ -27,42 +27,66 @@ fft_preamble = """
     """
 
 
-def _fft_out_chunks(a, n, axis):
-    """ For computing the output chunks of fft and ifft"""
-    if n is None:
+def _fft_out_chunks(a, s, axes):
+    """ For computing the output chunks of [i]fft*"""
+    if s is None:
         return a.chunks
     chunks = list(a.chunks)
-    chunks[axis] = (n,)
+    for i, axis in enumerate(axes):
+        chunks[axis] = (s[i],)
     return chunks
 
 
-def _rfft_out_chunks(a, n, axis):
-    if n is None:
-        n = a.chunks[axis][0]
+def _rfft_out_chunks(a, s, axes):
+    """ For computing the output chunks of rfft*"""
+    if s is None:
+        s = [a.chunks[axis][0] for axis in axes]
+    s = list(s)
+    s[-1] = s[-1] // 2 + 1
     chunks = list(a.chunks)
-    chunks[axis] = (n // 2 + 1,)
+    for i, axis in enumerate(axes):
+        chunks[axis] = (s[i],)
     return chunks
 
 
-def _irfft_out_chunks(a, n, axis):
-    if n is None:
-        n = 2 * (a.chunks[axis][0] - 1)
+def _irfft_out_chunks(a, s, axes):
+    """ For computing the output chunks of irfft*"""
+    if s is None:
+        s = [a.chunks[axis][0] for axis in axes]
+        s[-1] = 2 * (s[-1] - 1)
     chunks = list(a.chunks)
-    chunks[axis] = (n,)
+    for i, axis in enumerate(axes):
+        chunks[axis] = (s[i],)
     return chunks
 
 
-def _hfft_out_chunks(a, n, axis):
-    if n is None:
-        n = 2 * (a.chunks[axis][0] - 1)
+def _hfft_out_chunks(a, s, axes):
+    assert len(axes) == 1
+
+    axis = axes[0]
+
+    if s is None:
+        s = [2 * (a.chunks[axis][0] - 1)]
+
+    n = s[0]
+
     chunks = list(a.chunks)
     chunks[axis] = (n,)
     return chunks
 
 
-def _ihfft_out_chunks(a, n, axis):
-    if n is None:
-        n = a.chunks[axis][0]
+def _ihfft_out_chunks(a, s, axes):
+    assert len(axes) == 1
+
+    axis = axes[0]
+
+    if s is None:
+        s = [a.chunks[axis][0]]
+    else:
+        assert len(s) == 1
+
+    n = s[0]
+
     chunks = list(a.chunks)
     if n % 2 == 0:
         m = (n // 2) + 1
@@ -110,24 +134,58 @@ def fft_wrap(fft_func, kind=None, dtype=None):
     if kind is None:
         kind = fft_func.__name__
     try:
-        out_chunk_fn = _out_chunk_fns[kind]
+        out_chunk_fn = _out_chunk_fns[kind.rstrip("2n")]
     except KeyError:
         raise ValueError("Given unknown `kind` %s." % kind)
 
-    _dtype = dtype
+    def func(a, s=None, axes=None):
... 7645 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git



More information about the Python-modules-commits mailing list