[Python-modules-commits] [dask] 01/08: New upstream version 0.14.3
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jun 1 19:52:17 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository dask.
commit ce18bbf5aaf2276d6d5c7657b83148a8a3676461
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date: Thu Jun 1 19:15:28 2017 +0100
New upstream version 0.14.3
---
.coveragerc | 1 +
.travis.yml | 24 +-
appveyor.yml | 2 +-
.../appveyor/setup_conda_environment.cmd | 9 +-
continuous_integration/travis/install.sh | 55 ++-
dask/array/__init__.py | 4 +-
dask/array/conftest.py | 5 +-
dask/array/core.py | 238 +++++++++---
dask/array/creation.py | 63 +++-
dask/array/fft.py | 124 +++++--
dask/array/ghost.py | 5 +-
dask/array/optimization.py | 4 +-
dask/array/rechunk.py | 78 +++-
dask/array/reshape.py | 8 +-
dask/array/tests/test_array_core.py | 260 +++++++++++--
dask/array/tests/test_creation.py | 54 +++
dask/array/tests/test_fft.py | 212 ++++++-----
dask/array/tests/test_ghost.py | 15 +
dask/array/tests/test_linalg.py | 2 +
dask/array/tests/test_optimization.py | 13 +
dask/array/tests/test_rechunk.py | 193 +++++++++-
dask/array/tests/test_reductions.py | 1 +
dask/array/tests/test_slicing.py | 1 +
dask/array/tests/test_sparse.py | 130 +++++++
dask/array/utils.py | 38 +-
dask/async.py | 29 +-
dask/bag/core.py | 32 +-
dask/bag/tests/test_bag.py | 68 ++--
dask/base.py | 63 ++--
dask/bytes/core.py | 2 +-
dask/bytes/local.py | 26 +-
dask/bytes/s3.py | 26 +-
dask/bytes/tests/test_local.py | 41 +-
dask/context.py | 45 +++
dask/core.py | 22 +-
dask/dataframe/__init__.py | 4 +-
dask/dataframe/core.py | 207 ++++++++---
dask/dataframe/groupby.py | 40 +-
dask/dataframe/hashing.py | 27 +-
dask/dataframe/io/__init__.py | 1 +
dask/dataframe/io/csv.py | 47 ++-
dask/dataframe/io/parquet.py | 267 +++++++++----
dask/dataframe/io/sql.py | 131 +++++++
dask/dataframe/io/tests/test_csv.py | 48 +++
dask/dataframe/io/tests/test_io.py | 6 +-
dask/dataframe/io/tests/test_parquet.py | 110 ++++--
dask/dataframe/io/tests/test_sql.py | 148 ++++++++
dask/dataframe/methods.py | 27 +-
dask/dataframe/multi.py | 4 +-
dask/dataframe/optimize.py | 9 +-
dask/dataframe/reshape.py | 1 +
dask/dataframe/rolling.py | 152 +++++++-
dask/dataframe/shuffle.py | 42 ++-
dask/dataframe/tests/test_dataframe.py | 413 ++++++++-------------
dask/dataframe/tests/test_format.py | 41 +-
dask/dataframe/tests/test_groupby.py | 56 ++-
dask/dataframe/tests/test_hashing.py | 9 +
dask/dataframe/tests/test_indexing.py | 8 +-
dask/dataframe/tests/test_multi.py | 6 +-
dask/dataframe/tests/test_optimize_dataframe.py | 2 +-
dask/dataframe/tests/test_reshape.py | 24 +-
dask/dataframe/tests/test_rolling.py | 92 ++++-
dask/dataframe/tests/test_shuffle.py | 297 ++++++++++++++-
dask/dataframe/tseries/resample.py | 7 +-
dask/dataframe/utils.py | 43 ++-
dask/delayed.py | 58 ++-
dask/diagnostics/profile.py | 15 +-
dask/distributed.py | 13 +-
dask/optimize.py | 48 +--
dask/tests/test_base.py | 45 +++
dask/tests/test_context.py | 22 +-
dask/tests/test_delayed.py | 41 ++
dask/tests/test_distributed.py | 42 ++-
dask/tests/test_optimize.py | 34 +-
dask/tests/test_threaded.py | 44 ++-
dask/tests/test_utils.py | 13 +-
dask/utils.py | 24 +-
docs/requirements-docs.txt | 1 -
docs/source/array-api.rst | 57 ++-
docs/source/array-creation.rst | 71 ++++
docs/source/array-sparse.rst | 80 ++++
docs/source/array.rst | 1 +
docs/source/bag-creation.rst | 5 +-
docs/source/changelog.rst | 48 +++
docs/source/conf.py | 2 +-
docs/source/dataframe-api.rst | 43 ++-
docs/source/dataframe-create.rst | 15 +-
docs/source/dataframe-performance.rst | 39 +-
docs/source/debugging.rst | 238 ++++++++++++
docs/source/index.rst | 3 +
docs/source/machine-learning.rst | 111 ++++++
docs/source/optimize.rst | 39 ++
docs/source/scheduler-choice.rst | 32 +-
93 files changed, 4333 insertions(+), 1013 deletions(-)
diff --git a/.coveragerc b/.coveragerc
index be2863c..6b7b4ba 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -2,5 +2,6 @@
omit =
*/test_*.py
dask/compatibility.py
+ dask/_version.py
source =
dask
diff --git a/.travis.yml b/.travis.yml
index 4e24150..911d6f7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,25 @@
-language: python
+language: generic
sudo: false
+os: linux
env:
matrix:
- PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=2.7 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+ - PYTHON=2.7 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
- - PYTHON=3.5 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
- - PYTHON=3.6 NUMPY=1.11.2 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+ - PYTHON=3.5 NUMPY=1.11.3 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
+ - PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
-addons:
- apt:
- packages:
- - graphviz
- - liblzma-dev
+matrix:
+ fast_finish: true
+ include:
+ - os: osx
+ env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
+ # Together with fast_finish, allow build to be marked successful before the OS X job finishes
+ allow_failures:
+ - os: osx
+ # This needs to be the exact same line as above
+ env: PYTHON=3.6 NUMPY=1.12.1 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
install: source continuous_integration/travis/install.sh
script:
diff --git a/appveyor.yml b/appveyor.yml
index b0837c3..55f9c62 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -12,7 +12,7 @@ environment:
# Since appveyor is quite slow, we only use a single configuration
- PYTHON: "3.5"
ARCH: "64"
- NUMPY: "1.11"
+ NUMPY: "1.12.1"
PANDAS: "0.19.2"
CONDA_ENV: testenv
diff --git a/continuous_integration/appveyor/setup_conda_environment.cmd b/continuous_integration/appveyor/setup_conda_environment.cmd
index fd4623e..9773097 100644
--- a/continuous_integration/appveyor/setup_conda_environment.cmd
+++ b/continuous_integration/appveyor/setup_conda_environment.cmd
@@ -19,13 +19,20 @@ conda create -n %CONDA_ENV% -q -y python=%PYTHON% pytest toolz
call activate %CONDA_ENV%
+ at rem Pin matrix items
+ at rem Please see PR ( https://github.com/dask/dask/pull/2185 ) for details.
+copy NUL %CONDA_PREFIX%\conda-meta\pinned
+echo numpy %NUMPY% >> %CONDA_PREFIX%\conda-meta\pinned
+echo pandas %PANDAS% >> %CONDA_PREFIX%\conda-meta\pinned
+
@rem Install optional dependencies for tests
-%CONDA_INSTALL% numpy=%NUMPY% pandas=%PANDAS% cloudpickle distributed
+%CONDA_INSTALL% numpy pandas cloudpickle distributed
%CONDA_INSTALL% s3fs psutil pytables bokeh bcolz scipy h5py ipython
%PIP_INSTALL% git+https://github.com/dask/partd --upgrade
%PIP_INSTALL% git+https://github.com/dask/cachey --upgrade
%PIP_INSTALL% git+https://github.com/dask/distributed --upgrade
+%PIP_INSTALL% git+https://github.com/mrocklin/sparse --upgrade
%PIP_INSTALL% blosc --upgrade
%PIP_INSTALL% moto
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index 901e74a..37aedcf 100644
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -1,66 +1,87 @@
# Install conda
-wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+case "$(uname -s)" in
+ 'Darwin')
+ MINICONDA_FILENAME="Miniconda3-latest-MacOSX-x86_64.sh"
+ ;;
+ 'Linux')
+ MINICONDA_FILENAME="Miniconda3-latest-Linux-x86_64.sh"
+ ;;
+ *) ;;
+esac
+
+
+wget https://repo.continuum.io/miniconda/$MINICONDA_FILENAME -O miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda config --set always_yes yes --set changeps1 no
# Create conda environment
-conda create -n test-environment python=$PYTHON
+conda create -q -n test-environment python=$PYTHON
source activate test-environment
+# Pin matrix items
+# Please see PR ( https://github.com/dask/dask/pull/2185 ) for details.
+touch $CONDA_PREFIX/conda-meta/pinned
+echo "numpy $NUMPY" >> $CONDA_PREFIX/conda-meta/pinned
+echo "pandas $PANDAS" >> $CONDA_PREFIX/conda-meta/pinned
+
# Install dependencies.
# XXX: Due to a weird conda dependency resolution issue, we need to install
# dependencies in two separate calls, otherwise we sometimes get version
# incompatible with the installed version of numpy leading to crashes. This
# seems to have to do with differences between conda-forge and defaults.
-conda install -c conda-forge \
- numpy=$NUMPY \
- pandas=$PANDAS \
+conda install -q -c conda-forge \
+ numpy \
+ pandas \
bcolz \
blosc \
+ bokeh \
chest \
coverage \
cytoolz \
+ graphviz \
h5py \
ipython \
partd \
psutil \
+ pyarrow \
pytables \
pytest \
+ scikit-image \
scikit-learn \
scipy \
+ sqlalchemy \
toolz
# Specify numpy/pandas here to prevent upgrade/downgrade
-conda install -c conda-forge \
- numpy=$NUMPY \
- pandas=$PANDAS \
+conda install -q -c conda-forge \
distributed \
cloudpickle \
- bokeh \
-pip install git+https://github.com/dask/zict --upgrade --no-deps
-pip install git+https://github.com/dask/distributed --upgrade --no-deps
+pip install -q git+https://github.com/dask/partd --upgrade --no-deps
+pip install -q git+https://github.com/dask/zict --upgrade --no-deps
+pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
+pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
if [[ $PYTHONOPTIMIZE != '2' ]]; then
- conda install -c conda-forge numba cython
- pip install git+https://github.com/dask/fastparquet
+ conda install -q -c conda-forge numba cython
+ pip install -q git+https://github.com/dask/fastparquet
fi
if [[ $PYTHON == '2.7' ]]; then
- pip install backports.lzma mock
+ pip install -q backports.lzma mock
fi
-pip install \
+pip install -q \
cachey \
graphviz \
moto \
--upgrade --no-deps
-pip install \
+pip install -q \
flake8 \
pandas_datareader \
pytest-xdist
# Install dask
-pip install --no-deps -e .[complete]
+pip install -q --no-deps -e .[complete]
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index 5ad784b..22270ac 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, division, print_function
from ..utils import ignoring
from .core import (Array, stack, concatenate, take, tensordot, transpose,
from_array, choose, where, coarsen, insert, broadcast_to, ravel,
- fromfunction, unique, store, squeeze, topk, bincount,
+ roll, fromfunction, unique, store, squeeze, topk, bincount, tile,
digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
from_delayed, round, swapaxes, repeat, asarray)
@@ -32,4 +32,4 @@ from .rechunk import rechunk
from ..context import set_options
from ..base import compute
from .optimization import optimize
-from .creation import arange, linspace
+from .creation import arange, linspace, indices
diff --git a/dask/array/conftest.py b/dask/array/conftest.py
index e550319..25f0d1b 100644
--- a/dask/array/conftest.py
+++ b/dask/array/conftest.py
@@ -1,3 +1,6 @@
+import os
+
+
def pytest_ignore_collect(path, config):
- if 'fft.py' in str(path):
+ if os.path.split(str(path))[1].startswith("fft.py"):
return True
diff --git a/dask/array/core.py b/dask/array/core.py
index c70c26f..d9d47f4 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -6,7 +6,7 @@ from collections import Iterator
from functools import partial, wraps
import inspect
from itertools import product
-from numbers import Number
+from numbers import Integral, Number
import operator
from operator import add, getitem, mul
import os
@@ -17,7 +17,6 @@ from threading import Lock
import uuid
import warnings
-import toolz
try:
from cytoolz.curried import (partition, concat, pluck, join, first,
groupby, valmap, accumulate, interleave,
@@ -34,9 +33,10 @@ from . import chunk
from .slicing import slice_array
from . import numpy_compat
from ..base import Base, tokenize, normalize_token
+from ..context import _globals
from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete,
is_integer, IndexCallable, funcname, derived_from,
- SerializableLock, ensure_dict)
+ SerializableLock, ensure_dict, package_of)
from ..compatibility import unicode, long, getargspec, zip_longest, apply
from ..delayed import to_task_dask
from .. import threaded, core
@@ -102,7 +102,7 @@ def slices_from_chunks(chunks):
for start, shape in zip(starts, shapes)]
-def getem(arr, chunks, shape=None, out_name=None, fancy=True, lock=False):
+def getem(arr, chunks, getitem=getarray, shape=None, out_name=None, lock=False):
""" Dask getting various chunks from an array-like
>>> getem('X', chunks=(2, 3), shape=(4, 6)) # doctest: +SKIP
@@ -122,12 +122,11 @@ def getem(arr, chunks, shape=None, out_name=None, fancy=True, lock=False):
keys = list(product([out_name], *[range(len(bds)) for bds in chunks]))
slices = slices_from_chunks(chunks)
- getter = getarray if fancy else getarray_nofancy
if lock:
- values = [(getter, arr, x, lock) for x in slices]
+ values = [(getitem, arr, x, lock) for x in slices]
else:
- values = [(getter, arr, x) for x in slices]
+ values = [(getitem, arr, x) for x in slices]
return dict(zip(keys, values))
@@ -466,7 +465,8 @@ def _concatenate2(arrays, axes=[]):
return arrays
if len(axes) > 1:
arrays = [_concatenate2(a, axes=axes[1:]) for a in arrays]
- return np.concatenate(arrays, axis=axes[0])
+ module = package_of(type(max(arrays, key=lambda x: x.__array_priority__))) or np
+ return module.concatenate(arrays, axis=axes[0])
def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
@@ -513,7 +513,8 @@ def map_blocks(func, *args, **kwargs):
drop_axis : number or iterable, optional
Dimensions lost by the function.
new_axis : number or iterable, optional
- New dimensions created by the function.
+ New dimensions created by the function. Note that these are applied
+ after ``drop_axis`` (if present).
name : string, optional
The key name to use for the array. If not provided, will be determined
by a hash of the arguments.
@@ -610,9 +611,6 @@ def map_blocks(func, *args, **kwargs):
if isinstance(new_axis, Number):
new_axis = [new_axis]
- if drop_axis and new_axis:
- raise ValueError("Can't specify drop_axis and new_axis together")
-
arrs = [a for a in args if isinstance(a, Array)]
other = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
@@ -665,15 +663,22 @@ def map_blocks(func, *args, **kwargs):
if i - 1 not in drop_axis), v)
for k, v in dsk.items())
numblocks = [n for i, n in enumerate(numblocks) if i not in drop_axis]
- elif new_axis:
+ if new_axis:
+ new_axis = sorted(new_axis)
+ for i in new_axis:
+ if not 0 <= i <= len(numblocks):
+ ndim = len(numblocks)
+ raise ValueError("Can't add axis %d when current "
+ "axis are %r. Missing axis: "
+ "%r" % (i, list(range(ndim)),
+ list(range(ndim, i))))
+ numblocks.insert(i, 1)
dsk, old_dsk = dict(), dsk
for key in old_dsk:
new_key = list(key)
for i in new_axis:
new_key.insert(i + 1, 0)
dsk[tuple(new_key)] = old_dsk[key]
- for i in sorted(new_axis):
- numblocks.insert(i, 1)
if chunks:
if len(chunks) != len(numblocks):
@@ -701,7 +706,7 @@ def map_blocks(func, *args, **kwargs):
"`chunks` kwarg.")
if drop_axis:
chunks2 = [c for (i, c) in enumerate(chunks2) if i not in drop_axis]
- elif new_axis:
+ if new_axis:
for i in sorted(new_axis):
chunks2.insert(i, (1,))
@@ -868,11 +873,25 @@ def store(sources, targets, lock=True, regions=None, compute=True, **kwargs):
raise ValueError("Different number of sources [%d] and targets [%d] than regions [%d]"
% (len(sources), len(targets), len(regions)))
- updates = [insert_to_ooc(tgt, src, lock=lock, region=reg)
- for tgt, src, reg in zip(targets, sources, regions)]
- keys = [key for u in updates for key in u]
+ updates = {}
+ keys = []
+ for tgt, src, reg in zip(targets, sources, regions):
+ # if out is a delayed object update dictionary accordingly
+ try:
+ dsk = {}
+ dsk.update(tgt.dask)
+ tgt = tgt.key
+ except AttributeError:
+ dsk = {}
+
+ update = insert_to_ooc(tgt, src, lock=lock, region=reg)
+ keys.extend(update)
+
+ update.update(dsk)
+ updates.update(update)
+
name = 'store-' + tokenize(*keys)
- dsk = sharedict.merge((name, toolz.merge(updates)), *[src.dask for src in sources])
+ dsk = sharedict.merge((name, updates), *[src.dask for src in sources])
if compute:
Array._get(dsk, keys, **kwargs)
else:
@@ -949,7 +968,8 @@ class Array(Base):
_default_get = staticmethod(threaded.get)
_finalize = staticmethod(finalize)
- def __init__(self, dask, name, chunks, dtype, shape=None):
+ def __new__(cls, dask, name, chunks, dtype, shape=None):
+ self = super(Array, cls).__new__(cls)
assert isinstance(dask, Mapping)
if not isinstance(dask, ShareDict):
s = ShareDict()
@@ -964,15 +984,15 @@ class Array(Base):
raise ValueError("You must specify the dtype of the array")
self.dtype = np.dtype(dtype)
- @property
- def _args(self):
- return (self.dask, self.name, self.chunks, self.dtype)
+ for plugin in _globals.get('array_plugins', ()):
+ result = plugin(self)
+ if result is not None:
+ self = result
- def __getstate__(self):
- return self._args
+ return self
- def __setstate__(self, state):
- self.dask, self.name, self._chunks, self.dtype = state
+ def __reduce__(self):
+ return (Array, (self.dask, self.name, self.chunks, self.dtype))
@property
def numblocks(self):
@@ -1770,10 +1790,11 @@ def normalize_chunks(chunks, shape=None):
"""
if chunks is None:
raise ValueError(chunks_none_error_message)
- if isinstance(chunks, list):
- chunks = tuple(chunks)
- if isinstance(chunks, Number):
- chunks = (chunks,) * len(shape)
+ if type(chunks) is not tuple:
+ if type(chunks) is list:
+ chunks = tuple(chunks)
+ if isinstance(chunks, Number):
+ chunks = (chunks,) * len(shape)
if not chunks and shape and all(s == 0 for s in shape):
chunks = ((),) * len(shape)
@@ -1794,7 +1815,7 @@ def normalize_chunks(chunks, shape=None):
return tuple(map(tuple, chunks))
-def from_array(x, chunks, name=None, lock=False, fancy=True):
+def from_array(x, chunks, name=None, lock=False, fancy=True, getitem=None):
""" Create dask array from something that looks like an array
Input must have a ``.shape`` and support numpy-style slicing.
@@ -1848,7 +1869,13 @@ def from_array(x, chunks, name=None, lock=False, fancy=True):
original_name = name
if lock is True:
lock = SerializableLock()
- dsk = getem(original_name, chunks, out_name=name, fancy=fancy, lock=lock)
+
+ if getitem is None:
+ if fancy:
+ getitem = getarray
+ else:
+ getitem = getarray_nofancy
+ dsk = getem(original_name, chunks, getitem, out_name=name, lock=lock)
dsk[original_name] = x
return Array(dsk, name, chunks, dtype=x.dtype)
@@ -2149,7 +2176,7 @@ def atop(func, out_ind, *args, **kwargs):
argindsstr = list(concat([(a.name, ind) for a, ind in arginds]))
# Finish up the name
if not out:
- out = '%s-%s' % (token or funcname(func),
+ out = '%s-%s' % (token or funcname(func).strip('_'),
tokenize(func, out_ind, argindsstr, dtype, **kwargs))
dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks, **kwargs)
@@ -2239,6 +2266,9 @@ def stack(seq, axis=0):
uc_args = list(concat((x, ind) for x in seq))
_, seq = unify_chunks(*uc_args)
+ dt = reduce(np.promote_types, [a.dtype for a in seq])
+ seq = [x.astype(dt) for x in seq]
+
assert len(set(a.chunks for a in seq)) == 1 # same chunks
chunks = (seq[0].chunks[:axis] + ((1,) * n,) + seq[0].chunks[axis:])
@@ -2255,18 +2285,27 @@ def stack(seq, axis=0):
dsk = dict(zip(keys, values))
dsk2 = sharedict.merge((name, dsk), *[a.dask for a in seq])
- dt = reduce(np.promote_types, [a.dtype for a in seq])
-
return Array(dsk2, name, chunks, dtype=dt)
-def concatenate(seq, axis=0):
+def concatenate(seq, axis=0, allow_unknown_chunksizes=False):
"""
Concatenate arrays along an existing axis
Given a sequence of dask Arrays form a new dask Array by stacking them
along an existing dimension (axis=0 by default)
+ Parameters
+ ----------
+ seq: list of dask.arrays
+ axis: int
+ Dimension along which to align all of the arrays
+ allow_unknown_chunksizes: bool
+ Allow unknown chunksizes, such as come from converting from dask
+ dataframes. Dask.array is unable to verify that chunks line up. If
+ data comes from differently aligned sources then this can cause
+ unexpected results.
+
Examples
--------
@@ -2299,8 +2338,14 @@ def concatenate(seq, axis=0):
msg = ("Axis must be less than than number of dimensions"
"\nData has %d dimensions, but got axis=%d")
raise ValueError(msg % (ndim, axis))
- if not all(i == axis or all(x.shape[i] == seq[0].shape[i] for x in seq)
- for i in range(ndim)):
+ if (not allow_unknown_chunksizes and
+ not all(i == axis or all(x.shape[i] == seq[0].shape[i] for x in seq)
+ for i in range(ndim))):
+ if any(map(np.isnan, seq[0].shape)):
+ raise ValueError("Tried to concatenate arrays with unknown"
+ " shape %s. To force concatenation pass"
+ " allow_unknown_chunksizes=True."
+ % str(seq[0].shape))
raise ValueError("Shapes do not align: %s", [x.shape for x in seq])
inds = [list(range(ndim)) for i in range(n)]
@@ -2425,15 +2470,25 @@ def transpose(a, axes=None):
else:
axes = tuple(range(a.ndim))[::-1]
axes = tuple(d + a.ndim if d < 0 else d for d in axes)
- return atop(partial(np.transpose, axes=axes),
- axes,
- a, tuple(range(a.ndim)), dtype=a.dtype)
+ return atop(np.transpose, axes, a, tuple(range(a.ndim)),
+ dtype=a.dtype, axes=axes)
alphabet = 'abcdefghijklmnopqrstuvwxyz'
ALPHABET = alphabet.upper()
+def _tensordot(a, b, axes):
+ x = max([a, b], key=lambda x: x.__array_priority__)
+ module = package_of(type(x)) or np
+ x = module.tensordot(a, b, axes=axes)
+ ind = [slice(None, None)] * x.ndim
+ for a in sorted(axes[0]):
+ ind.insert(a, None)
+ x = x[tuple(ind)]
+ return x
+
+
@wraps(np.tensordot)
def tensordot(lhs, rhs, axes=2):
if isinstance(axes, Iterable):
@@ -2451,10 +2506,6 @@ def tensordot(lhs, rhs, axes=2):
if isinstance(right_axes, list):
right_axes = tuple(right_axes)
- if len(left_axes) > 1:
- raise NotImplementedError("Simultaneous Contractions of multiple "
- "indices not yet supported")
-
dt = np.promote_types(lhs.dtype, rhs.dtype)
left_index = list(alphabet[:lhs.ndim])
@@ -2465,16 +2516,13 @@ def tensordot(lhs, rhs, axes=2):
out_index.remove(right_index[r])
right_index[r] = left_index[l]
- intermediate = atop(np.tensordot, out_index,
+ intermediate = atop(_tensordot, out_index,
lhs, left_index,
rhs, right_index, dtype=dt,
axes=(left_axes, right_axes))
- int_index = list(out_index)
- for l in left_axes:
- out_index.remove(left_index[l])
-
- return atop(sum, out_index, intermediate, int_index, dtype=dt)
+ result = intermediate.sum(axis=left_axes)
+ return result
@wraps(np.dot)
@@ -2486,7 +2534,7 @@ def insert_to_ooc(out, arr, lock=True, region=None):
if lock is True:
lock = Lock()
- def store(x, index, lock, region):
+ def store(out, x, index, lock, region):
if lock:
lock.acquire()
try:
@@ -2503,8 +2551,9 @@ def insert_to_ooc(out, arr, lock=True, region=None):
slices = slices_from_chunks(arr.chunks)
name = 'store-%s' % arr.name
- dsk = dict(((name,) + t[1:], (store, t, slc, lock, region))
+ dsk = dict(((name,) + t[1:], (store, out, t, slc, lock, region))
for t, slc in zip(core.flatten(arr._keys()), slices))
+
return dsk
@@ -2618,6 +2667,8 @@ def elemwise(op, *args, **kwargs):
msg = "%s does not take the following keyword arguments %s"
raise TypeError(msg % (op.__name__, str(sorted(set(kwargs) - set(['name', 'dtype'])))))
+ args = [np.asarray(a) if isinstance(a, (list, tuple)) else a for a in args]
+
shapes = [getattr(arg, 'shape', ()) for arg in args]
shapes = [s if isinstance(s, Iterable) else () for s in shapes]
out_ndim = len(broadcast_shapes(*shapes)) # Raises ValueError if dimensions mismatch
@@ -2831,6 +2882,54 @@ def ravel(array):
return array.reshape((-1,))
+ at wraps(np.roll)
+def roll(array, shift, axis=None):
+ result = array
+
+ if axis is None:
+ result = ravel(result)
+
+ if not isinstance(shift, Integral):
+ raise TypeError(
+ "Expect `shift` to be an instance of Integral"
+ " when `axis` is None."
+ )
+
+ shift = (shift,)
+ axis = (0,)
+ else:
+ try:
+ len(shift)
+ except TypeError:
+ shift = (shift,)
+ try:
+ len(axis)
+ except TypeError:
+ axis = (axis,)
+
+ if len(shift) != len(axis):
+ raise ValueError("Must have the same number of shifts as axes.")
+
+ for i, s in zip(axis, shift):
+ s = -s
+ s %= result.shape[i]
+
+ sl1 = result.ndim * [slice(None)]
+ sl2 = result.ndim * [slice(None)]
+
+ sl1[i] = slice(s, None)
+ sl2[i] = slice(None, s)
+
+ sl1 = tuple(sl1)
+ sl2 = tuple(sl2)
+
+ result = concatenate([result[sl1], result[sl2]], axis=i)
+
+ result = result.reshape(array.shape)
+
+ return result
+
+
def offset_func(func, offset, *args):
""" Offsets inputs by offset
@@ -3329,11 +3428,19 @@ def concatenate3(arrays):
[1, 2, 1, 2]])
"""
arrays = concrete(arrays)
+ if not arrays:
+ return np.empty(0)
+
+ advanced = max(core.flatten(arrays, container=(list, tuple)),
+ key=lambda x: getattr(x, '__array_priority__', 0))
+ module = package_of(type(advanced)) or np
+ if module is not np and hasattr(module, 'concatenate'):
+ x = unpack_singleton(arrays)
+ return _concatenate2(arrays, axes=list(range(x.ndim)))
+
ndim = ndimlist(arrays)
if not ndim:
return arrays
- if not arrays:
- return np.empty(0)
chunks = chunks_from_arrays(arrays)
shape = tuple(map(sum, chunks))
@@ -3749,7 +3856,7 @@ def swapaxes(a, axis1, axis2):
dtype=a.dtype)
- at wraps(np.dot)
+ at wraps(np.repeat)
def repeat(a, repeats, axis=None):
if axis is None:
if a.ndim == 1:
@@ -3790,6 +3897,21 @@ def repeat(a, repeats, axis=None):
return concatenate(out, axis=axis)
+ at wraps(np.tile)
+def tile(A, reps):
+ if not isinstance(reps, Integral):
+ raise NotImplementedError("Only integer valued `reps` supported.")
+
+ if reps < 0:
+ raise ValueError("Negative `reps` are not allowed.")
+ elif reps == 0:
+ return A[..., :0]
+ elif reps == 1:
+ return A
+
+ return concatenate(reps * [A], axis=-1)
+
+
def slice_with_dask_array(x, index):
y = elemwise(getitem, x, index, dtype=x.dtype)
diff --git a/dask/array/creation.py b/dask/array/creation.py
index 1041e77..095dcef 100644
--- a/dask/array/creation.py
+++ b/dask/array/creation.py
@@ -1,10 +1,12 @@
from __future__ import absolute_import, division, print_function
from functools import partial
+import itertools
import numpy as np
-from .core import Array, normalize_chunks
+from .core import Array, normalize_chunks, stack
+from .wrap import empty
from . import chunk
from ..base import tokenize
@@ -135,3 +137,62 @@ def arange(*args, **kwargs):
elem_count += bs
return Array(dsk, name, chunks, dtype=dtype)
+
+
+def indices(dimensions, dtype=int, chunks=None):
+ """
+ Implements NumPy's ``indices`` for Dask Arrays.
+
+ Generates a grid of indices covering the dimensions provided.
+
+ The final array has the shape ``(len(dimensions), *dimensions)``. The
+ chunks are used to specify the chunking for axis 1 up to
+ ``len(dimensions)``. The 0th axis always has chunks of length 1.
+
+ Parameters
+ ----------
+ dimensions : sequence of ints
+ The shape of the index grid.
+ dtype : dtype, optional
+ Type to use for the array. Default is ``int``.
+ chunks : sequence of ints
+ The number of samples on each block. Note that the last block will have
+ fewer samples if ``len(array) % chunks != 0``.
+
+ Returns
+ -------
+ grid : dask array
+ """
+ if chunks is None:
+ raise ValueError("Must supply a chunks= keyword argument")
+
+ dimensions = tuple(dimensions)
+ dtype = np.dtype(dtype)
+ chunks = tuple(chunks)
+
+ if len(dimensions) != len(chunks):
+ raise ValueError("Need one more chunk than dimensions.")
+
+ grid = []
+ if np.prod(dimensions):
+ for i in range(len(dimensions)):
+ s = len(dimensions) * [None]
+ s[i] = slice(None)
+ s = tuple(s)
+
+ r = arange(dimensions[i], dtype=dtype, chunks=chunks[i])
+ r = r[s]
+
+ for j in itertools.chain(range(i), range(i + 1, len(dimensions))):
+ r = r.repeat(dimensions[j], axis=j)
+
+ grid.append(r)
+
+ if grid:
+ grid = stack(grid)
+ else:
+ grid = empty(
+ (len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks
+ )
+
+ return grid
diff --git a/dask/array/fft.py b/dask/array/fft.py
index f0d63f8..cfe478f 100644
--- a/dask/array/fft.py
+++ b/dask/array/fft.py
@@ -27,42 +27,66 @@ fft_preamble = """
"""
-def _fft_out_chunks(a, n, axis):
- """ For computing the output chunks of fft and ifft"""
- if n is None:
+def _fft_out_chunks(a, s, axes):
+ """ For computing the output chunks of [i]fft*"""
+ if s is None:
return a.chunks
chunks = list(a.chunks)
- chunks[axis] = (n,)
+ for i, axis in enumerate(axes):
+ chunks[axis] = (s[i],)
return chunks
-def _rfft_out_chunks(a, n, axis):
- if n is None:
- n = a.chunks[axis][0]
+def _rfft_out_chunks(a, s, axes):
+ """ For computing the output chunks of rfft*"""
+ if s is None:
+ s = [a.chunks[axis][0] for axis in axes]
+ s = list(s)
+ s[-1] = s[-1] // 2 + 1
chunks = list(a.chunks)
- chunks[axis] = (n // 2 + 1,)
+ for i, axis in enumerate(axes):
+ chunks[axis] = (s[i],)
return chunks
-def _irfft_out_chunks(a, n, axis):
- if n is None:
- n = 2 * (a.chunks[axis][0] - 1)
+def _irfft_out_chunks(a, s, axes):
+ """ For computing the output chunks of irfft*"""
+ if s is None:
+ s = [a.chunks[axis][0] for axis in axes]
+ s[-1] = 2 * (s[-1] - 1)
chunks = list(a.chunks)
- chunks[axis] = (n,)
+ for i, axis in enumerate(axes):
+ chunks[axis] = (s[i],)
return chunks
-def _hfft_out_chunks(a, n, axis):
- if n is None:
- n = 2 * (a.chunks[axis][0] - 1)
+def _hfft_out_chunks(a, s, axes):
+ assert len(axes) == 1
+
+ axis = axes[0]
+
+ if s is None:
+ s = [2 * (a.chunks[axis][0] - 1)]
+
+ n = s[0]
+
chunks = list(a.chunks)
chunks[axis] = (n,)
return chunks
-def _ihfft_out_chunks(a, n, axis):
- if n is None:
- n = a.chunks[axis][0]
+def _ihfft_out_chunks(a, s, axes):
+ assert len(axes) == 1
+
+ axis = axes[0]
+
+ if s is None:
+ s = [a.chunks[axis][0]]
+ else:
+ assert len(s) == 1
+
+ n = s[0]
+
chunks = list(a.chunks)
if n % 2 == 0:
m = (n // 2) + 1
@@ -110,24 +134,58 @@ def fft_wrap(fft_func, kind=None, dtype=None):
if kind is None:
kind = fft_func.__name__
try:
- out_chunk_fn = _out_chunk_fns[kind]
+ out_chunk_fn = _out_chunk_fns[kind.rstrip("2n")]
except KeyError:
raise ValueError("Given unknown `kind` %s." % kind)
- _dtype = dtype
+ def func(a, s=None, axes=None):
... 7645 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git
More information about the Python-modules-commits
mailing list