[Python-modules-commits] [dask] 01/07: Import dask_0.14.1.orig.tar.gz

Diane Trout diane at moszumanska.debian.org
Tue Apr 25 03:40:12 UTC 2017


This is an automated email from the git hooks/post-receive script.

diane pushed a commit to branch master
in repository dask.

commit 8d49b60585959da509198ee043172da6c923268d
Author: Diane Trout <diane at ghic.org>
Date:   Mon Apr 24 10:37:48 2017 -0700

    Import dask_0.14.1.orig.tar.gz
---
 .github/ISSUE_TEMPLATE.md                          |   22 +
 .travis.yml                                        |   51 +-
 README.rst                                         |    4 +-
 appveyor.yml                                       |   12 +-
 continuous_integration/{ => appveyor}/build.cmd    |    0
 .../{ => appveyor}/run_tests.cmd                   |    0
 .../{ => appveyor}/run_with_env.cmd                |    0
 .../{ => appveyor}/setup_conda_environment.cmd     |    6 +-
 continuous_integration/travis/after_success.sh     |    5 +
 continuous_integration/travis/install.sh           |   66 +
 continuous_integration/travis/run_tests.sh         |   12 +
 continuous_integration/travis/test_imports.sh      |   34 +
 dask/__init__.py                                   |   10 +-
 dask/array/__init__.py                             |    5 +-
 dask/array/chunk.py                                |    7 +-
 dask/array/core.py                                 |  703 +++++----
 dask/array/creation.py                             |   12 +-
 dask/array/fft.py                                  |   99 +-
 dask/array/ghost.py                                |   30 +-
 dask/array/learn.py                                |   10 +-
 dask/array/linalg.py                               |  113 +-
 dask/array/optimization.py                         |   65 +-
 dask/array/percentile.py                           |    6 +-
 dask/array/random.py                               |   13 +-
 dask/array/rechunk.py                              |  513 ++++++-
 dask/array/reductions.py                           |   90 +-
 dask/array/reshape.py                              |  186 +++
 dask/array/slicing.py                              |    6 +
 dask/array/tests/test_array_core.py                |  568 ++++++--
 dask/array/tests/test_creation.py                  |   28 +-
 dask/array/tests/test_fft.py                       |  131 +-
 dask/array/tests/test_ghost.py                     |    6 +-
 dask/array/tests/test_learn.py                     |    4 +
 dask/array/tests/test_linalg.py                    |   23 +
 dask/array/tests/test_optimization.py              |   63 +-
 dask/array/tests/test_random.py                    |    2 +-
 dask/array/tests/test_rechunk.py                   |  251 +++-
 dask/array/tests/test_reductions.py                |   10 +
 dask/array/tests/test_reshape.py                   |   45 +
 dask/array/tests/test_slicing.py                   |    4 +-
 dask/array/tests/test_ufunc.py                     |   13 +
 dask/array/tests/test_wrap.py                      |    2 +-
 dask/array/ufunc.py                                |   60 +-
 dask/array/utils.py                                |   23 +-
 dask/array/wrap.py                                 |    6 +-
 dask/async.py                                      |   42 +-
 dask/bag/__init__.py                               |    3 +-
 dask/bag/core.py                                   |  185 +--
 dask/bag/tests/test_bag.py                         |  139 +-
 dask/bag/tests/test_text.py                        |   10 +
 dask/bag/text.py                                   |   25 +-
 dask/base.py                                       |  289 +++-
 dask/bytes/compression.py                          |   56 +-
 dask/bytes/core.py                                 |  548 ++++---
 dask/bytes/local.py                                |  172 +--
 dask/bytes/s3.py                                   |  221 +--
 dask/bytes/tests/test_local.py                     |  100 +-
 dask/bytes/tests/test_s3.py                        |  162 ++-
 dask/bytes/utils.py                                |   17 +-
 dask/compatibility.py                              |   79 +-
 dask/context.py                                    |    2 +-
 dask/core.py                                       |   23 +-
 dask/dataframe/__init__.py                         |   10 +-
 dask/dataframe/accessor.py                         |  178 +--
 dask/dataframe/categorical.py                      |  247 +++-
 dask/dataframe/core.py                             | 1510 +++++++++++++-------
 dask/dataframe/groupby.py                          |  563 +++++---
 dask/dataframe/hashing.py                          |  132 ++
 dask/dataframe/hyperloglog.py                      |   84 ++
 dask/dataframe/indexing.py                         |   80 +-
 dask/dataframe/io/__init__.py                      |    8 +-
 dask/dataframe/io/csv.py                           |  179 ++-
 dask/dataframe/io/demo.py                          |  117 +-
 dask/dataframe/io/hdf.py                           |  144 +-
 dask/dataframe/io/io.py                            |  203 +--
 dask/dataframe/io/parquet.py                       |  319 +++++
 dask/dataframe/io/tests/test_csv.py                |  120 +-
 dask/dataframe/io/tests/test_demo.py               |   28 +-
 dask/dataframe/io/tests/test_hdf.py                |   69 +-
 dask/dataframe/io/tests/test_io.py                 |  170 +--
 dask/dataframe/io/tests/test_parquet.py            |  353 +++++
 dask/dataframe/methods.py                          |  163 ++-
 dask/dataframe/multi.py                            |  221 ++-
 dask/dataframe/optimize.py                         |   36 +-
 dask/dataframe/reshape.py                          |   29 +-
 dask/dataframe/rolling.py                          |  357 ++---
 dask/dataframe/shuffle.py                          |  179 ++-
 dask/dataframe/tests/test_arithmetics_reduction.py |   58 +-
 dask/dataframe/tests/test_categorical.py           |  268 +++-
 dask/dataframe/tests/test_dataframe.py             | 1011 +++++++------
 dask/dataframe/tests/test_format.py                |  453 ++++++
 dask/dataframe/tests/test_groupby.py               |  433 +++++-
 dask/dataframe/tests/test_hashing.py               |   48 +
 dask/dataframe/tests/test_hyperloglog.py           |   72 +
 dask/dataframe/tests/test_indexing.py              |   23 +
 dask/dataframe/tests/test_multi.py                 |  365 ++++-
 dask/dataframe/tests/test_optimize_dataframe.py    |   31 +-
 dask/dataframe/tests/test_reshape.py               |   65 +-
 dask/dataframe/tests/test_rolling.py               |  175 +--
 dask/dataframe/tests/test_shuffle.py               |  109 +-
 dask/dataframe/tests/test_utils_dataframe.py       |   45 +-
 dask/dataframe/tseries/resample.py                 |   17 +-
 dask/dataframe/utils.py                            |  137 +-
 dask/delayed.py                                    |  191 +--
 dask/diagnostics/profile_visualize.py              |   39 +-
 dask/diagnostics/tests/test_profiler.py            |   19 +-
 dask/diagnostics/tests/test_progress.py            |   28 +-
 dask/dot.py                                        |    5 +-
 dask/multiprocessing.py                            |   16 +-
 dask/optimize.py                                   |  706 +++++----
 dask/sharedict.py                                  |   99 ++
 dask/sizeof.py                                     |   79 +
 dask/tests/test_async.py                           |    2 +-
 dask/tests/test_base.py                            |  115 +-
 dask/tests/test_callbacks.py                       |   42 +
 dask/tests/test_core.py                            |    8 +-
 dask/tests/test_delayed.py                         |  111 +-
 dask/tests/test_distributed.py                     |   56 +
 dask/tests/test_dot.py                             |   11 +
 dask/tests/test_multiprocessing.py                 |   21 +-
 dask/tests/test_optimize.py                        | 1107 +++++++++++---
 dask/tests/test_rewrite.py                         |    1 +
 dask/tests/test_sharedict.py                       |   82 ++
 dask/tests/test_sizeof.py                          |   55 +
 dask/tests/test_utils.py                           |  142 +-
 dask/threaded.py                                   |    9 +-
 dask/utils.py                                      |  304 +++-
 dask/utils_test.py                                 |   10 +
 docs/release-procedure.md                          |    9 +-
 docs/requirements-docs.txt                         |    2 +-
 docs/source/array-creation.rst                     |   61 +
 docs/source/array-overview.rst                     |   16 +-
 docs/source/array-stack.rst                        |    4 +-
 docs/source/bytes.rst                              |    3 +
 docs/source/changelog.rst                          |  157 +-
 docs/source/dataframe-api.rst                      |   59 +-
 docs/source/dataframe-create.rst                   |  181 +--
 docs/source/dataframe-design.rst                   |   85 +-
 docs/source/dataframe-performance.rst              |  202 +++
 docs/source/dataframe.rst                          |    1 +
 docs/source/delayed-api.rst                        |    2 -
 docs/source/delayed-overview.rst                   |    2 +-
 docs/source/delayed.rst                            |   19 +-
 docs/source/examples-tutorials.rst                 |    4 +
 docs/source/faq.rst                                |    5 +
 docs/source/funding.rst                            |   20 +
 docs/source/images/inc-add.svg                     |   68 +
 docs/source/index.rst                              |    5 +
 docs/source/optimize.rst                           |   31 +-
 docs/source/presentations.rst                      |   32 +
 docs/source/remote-data-services.rst               |  237 +++
 docs/source/scheduler-overview.rst                 |    2 +
 setup.py                                           |   18 +-
 153 files changed, 13709 insertions(+), 4975 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..4270449
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,22 @@
+Thank you for reporting an issue.
+
+Please read the following guidelines to ensure that your issue is handled
+properly.
+
+1.  If you have a general usage question like
+    *"How do I filter a dask.dataframe?"* then please consider asking the
+    question on StackOverflow instead using the #Dask tag
+2.  If you have found a bug, then please include a self-contained copy-pastable
+    example that generates the issue if possible.
+3.  Please also include an exception and full traceback of the error if
+    available.
+4.  If you have an issue with fastparquet, the distributed system, s3fs, etc.
+    then please consider submitting a report to their issue trackers instead.
+    A full list of repositories can be found here:
+    [https://github.com/dask](https://github.com/dask)
+5.  Please include the versions of Python, dask, and other relevant libraries
+    if they are applicable, (pandas, dask.distributed, etc..)
+
+Thank you again for your efforts.
+
+[How to create a Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve)
diff --git a/.travis.yml b/.travis.yml
index 502a764..4e24150 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,11 +3,11 @@ sudo: false
 
 env:
   matrix:
-    - PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' XTRATESTARGS=
-    - PYTHON=2.7 NUMPY=1.11.0 PANDAS=0.18.1 COVERAGE='false' PARALLEL='true' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.3 NUMPY=1.9.2 PANDAS=0.18.1 COVERAGE='false' PARALLEL='true' XTRATESTARGS=
-    - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.18.0 COVERAGE='false' PARALLEL='true' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
-    - PYTHON=3.5 NUMPY=1.11.0 PANDAS=0.19.0 COVERAGE='false' PARALLEL='true' XTRATESTARGS=
+    - PYTHON=2.7 NUMPY=1.10.4 PANDAS=0.19.0 COVERAGE='true' PARALLEL='false' TEST_IMPORTS='false' XTRATESTARGS=
+    - PYTHON=2.7 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+    - PYTHON=3.4 NUMPY=1.10.4 PANDAS=0.19.1 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics
+    - PYTHON=3.5 NUMPY=1.11.0 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='false' XTRATESTARGS=
+    - PYTHON=3.6 NUMPY=1.11.2 PANDAS=0.19.2 COVERAGE='false' PARALLEL='true' TEST_IMPORTS='true' XTRATESTARGS=
 
 addons:
     apt:
@@ -15,43 +15,12 @@ addons:
         - graphviz
         - liblzma-dev
 
-install:
-  # Install conda
-  - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
-  - bash miniconda.sh -b -p $HOME/miniconda
-  - export PATH="$HOME/miniconda/bin:$PATH"
-  - conda config --set always_yes yes --set changeps1 no
-  - conda update conda
-
-  # Install dependencies
-  - conda create -n test-environment python=$PYTHON
-  - source activate test-environment
-  - conda install -c conda-forge numpy=$NUMPY scipy pytables h5py bcolz pytest coverage toolz scikit-learn cytoolz chest blosc cython psutil ipython
-  - if [[ $PYTHON != '3.3' ]]; then conda install -c conda-forge pandas=$PANDAS distributed cloudpickle bokeh python-lmdb; fi
-  - if [[ $PYTHON != '3.3' ]]; then pip install git+https://github.com/dask/zict --upgrade --no-deps; fi
-  - if [[ $PYTHON != '3.3' ]]; then pip install git+https://github.com/dask/distributed --upgrade --no-deps; fi
-  - if [[ $PYTHON == '3.3' ]]; then pip install cloudpickle pandas==$PANDAS; fi
-  - if [[ $PYTHON == '2.7' ]]; then pip install backports.lzma mock; fi
-  - pip install git+https://github.com/mrocklin/partd --upgrade
-  - pip install git+https://github.com/mrocklin/cachey --upgrade
-  - pip install blosc --upgrade
-  - pip install graphviz moto flake8
-  - if [[ $PYTHON < '3' ]]; then pip install git+https://github.com/Blosc/castra; fi
-  # For parallel testing (`-n` argument in XTRATESTARGS)
-  - pip install pytest-xdist
-
-  # Install dask
-  - pip install --no-deps -e .[complete]
-
+install: source continuous_integration/travis/install.sh
 script:
-    # Need to make test order deterministic when parallelizing tests, hence PYTHONHASHSEED
-    # (see https://github.com/pytest-dev/pytest-xdist/issues/63)
-    - if [[ $PARALLEL == 'true' ]]; then export XTRATESTARGS="-n3 $XTRATESTARGS"; export PYTHONHASHSEED=42; fi
-    - if [[ $COVERAGE == 'true' ]]; then coverage run $(which py.test) dask --runslow --doctest-modules --verbose $XTRATESTARGS; else py.test dask --runslow --verbose $XTRATESTARGS; fi
-    - flake8 dask
-
-after_success:
-    - if [[ $coverage == 'true' ]]; then coverage report --show-missing; pip install coveralls ; coveralls ; fi
+  - source continuous_integration/travis/run_tests.sh
+  - flake8 dask
+  - if [[ $TEST_IMPORTS == 'true' ]]; then source continuous_integration/travis/test_imports.sh; fi
+after_success: source continuous_integration/travis/after_success.sh
 
 notifications:
   email: false
diff --git a/README.rst b/README.rst
index 4dede02..919339b 100644
--- a/README.rst
+++ b/README.rst
@@ -13,13 +13,13 @@ LICENSE
 New BSD. See `License File <https://github.com/dask/dask/blob/master/LICENSE.txt>`__.
 
 .. _documentation: http://dask.pydata.org/en/latest/
-.. |Build Status| image:: https://travis-ci.org/dask/dask.svg
+.. |Build Status| image:: https://travis-ci.org/dask/dask.svg?branch=master
    :target: https://travis-ci.org/dask/dask
 .. |Coverage| image:: https://coveralls.io/repos/dask/dask/badge.svg
    :target: https://coveralls.io/r/dask/dask
    :alt: Coverage status
 .. |Doc Status| image:: http://readthedocs.org/projects/dask/badge/?version=latest
-   :target: https://readthedocs.io/projects/dask/?badge=latest
+   :target: http://dask.pydata.org/en/latest/
    :alt: Documentation Status
 .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
    :alt: Join the chat at https://gitter.im/dask/dask
diff --git a/appveyor.yml b/appveyor.yml
index 22660b4..b0837c3 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -6,16 +6,14 @@ environment:
     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
     # /E:ON and /V:ON options are not enabled in the batch script intepreter
     # See: http://stackoverflow.com/a/13751649/163740
-    CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\continuous_integration\\run_with_env.cmd"
+    CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\continuous_integration\\appveyor\\run_with_env.cmd"
 
   matrix:
     # Since appveyor is quite slow, we only use a single configuration
     - PYTHON: "3.5"
       ARCH: "64"
       NUMPY: "1.11"
-      # Note Pandas 0.19 crashes in test_split_apply_combine_on_series,
-      # see https://ci.appveyor.com/project/pitrou/dask/build/1.0.40#L316
-      PANDAS: "0.18"
+      PANDAS: "0.19.2"
       CONDA_ENV: testenv
 
 init:
@@ -28,12 +26,12 @@ install:
   # Update to a known good conda
   # (to workaround http://help.appveyor.com/discussions/problems/4910)
   - conda install -q -y conda=4.2.9
-  - continuous_integration\\setup_conda_environment.cmd
+  - continuous_integration\\appveyor\\setup_conda_environment.cmd
 
 build_script:
-  - continuous_integration\\build.cmd
+  - continuous_integration\\appveyor\\build.cmd
 
 test_script:
   # %CMD_IN_ENV% is needed for distutils/setuptools-based tests
   # on certain build configurations.
-  - "%CMD_IN_ENV% continuous_integration\\run_tests.cmd"
+  - "%CMD_IN_ENV% continuous_integration\\appveyor\\run_tests.cmd"
diff --git a/continuous_integration/build.cmd b/continuous_integration/appveyor/build.cmd
similarity index 100%
rename from continuous_integration/build.cmd
rename to continuous_integration/appveyor/build.cmd
diff --git a/continuous_integration/run_tests.cmd b/continuous_integration/appveyor/run_tests.cmd
similarity index 100%
rename from continuous_integration/run_tests.cmd
rename to continuous_integration/appveyor/run_tests.cmd
diff --git a/continuous_integration/run_with_env.cmd b/continuous_integration/appveyor/run_with_env.cmd
similarity index 100%
rename from continuous_integration/run_with_env.cmd
rename to continuous_integration/appveyor/run_with_env.cmd
diff --git a/continuous_integration/setup_conda_environment.cmd b/continuous_integration/appveyor/setup_conda_environment.cmd
similarity index 83%
rename from continuous_integration/setup_conda_environment.cmd
rename to continuous_integration/appveyor/setup_conda_environment.cmd
index b14e24b..fd4623e 100644
--- a/continuous_integration/setup_conda_environment.cmd
+++ b/continuous_integration/appveyor/setup_conda_environment.cmd
@@ -23,12 +23,12 @@ call activate %CONDA_ENV%
 %CONDA_INSTALL% numpy=%NUMPY% pandas=%PANDAS% cloudpickle distributed
 %CONDA_INSTALL% s3fs psutil pytables bokeh bcolz scipy h5py ipython
 
-%PIP_INSTALL% git+https://github.com/mrocklin/partd --upgrade
-%PIP_INSTALL% git+https://github.com/mrocklin/cachey --upgrade
+%PIP_INSTALL% git+https://github.com/dask/partd --upgrade
+%PIP_INSTALL% git+https://github.com/dask/cachey --upgrade
+%PIP_INSTALL% git+https://github.com/dask/distributed --upgrade
 %PIP_INSTALL% blosc --upgrade
 %PIP_INSTALL% moto
 
-if %PYTHON% LSS 3.0 (%PIP_INSTALL% git+https://github.com/Blosc/castra)
 if %PYTHON% LSS 3.0 (%PIP_INSTALL% backports.lzma mock)
 
 @rem Display final environment (for reproducing)
diff --git a/continuous_integration/travis/after_success.sh b/continuous_integration/travis/after_success.sh
new file mode 100644
index 0000000..2537adc
--- /dev/null
+++ b/continuous_integration/travis/after_success.sh
@@ -0,0 +1,5 @@
+if [[ $COVERAGE == 'true' ]]; then
+    coverage report --show-missing
+    pip install coveralls
+    coveralls
+fi
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
new file mode 100644
index 0000000..901e74a
--- /dev/null
+++ b/continuous_integration/travis/install.sh
@@ -0,0 +1,66 @@
+# Install conda
+wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+bash miniconda.sh -b -p $HOME/miniconda
+export PATH="$HOME/miniconda/bin:$PATH"
+conda config --set always_yes yes --set changeps1 no
+
+# Create conda environment
+conda create -n test-environment python=$PYTHON
+source activate test-environment
+
+# Install dependencies.
+# XXX: Due to a weird conda dependency resolution issue, we need to install
+# dependencies in two separate calls, otherwise we sometimes get version
+# incompatible with the installed version of numpy leading to crashes. This
+# seems to have to do with differences between conda-forge and defaults.
+conda install -c conda-forge \
+    numpy=$NUMPY \
+    pandas=$PANDAS \
+    bcolz \
+    blosc \
+    chest \
+    coverage \
+    cytoolz \
+    h5py \
+    ipython \
+    partd \
+    psutil \
+    pytables \
+    pytest \
+    scikit-learn \
+    scipy \
+    toolz
+
+# Specify numpy/pandas here to prevent upgrade/downgrade
+conda install -c conda-forge \
+    numpy=$NUMPY \
+    pandas=$PANDAS \
+    distributed \
+    cloudpickle \
+    bokeh \
+
+pip install git+https://github.com/dask/zict --upgrade --no-deps
+pip install git+https://github.com/dask/distributed --upgrade --no-deps
+
+if [[ $PYTHONOPTIMIZE != '2' ]]; then
+    conda install -c conda-forge numba cython
+    pip install git+https://github.com/dask/fastparquet
+fi
+
+if [[ $PYTHON == '2.7' ]]; then
+    pip install backports.lzma mock
+fi
+
+pip install \
+    cachey \
+    graphviz \
+    moto \
+    --upgrade --no-deps
+
+pip install \
+    flake8 \
+    pandas_datareader \
+    pytest-xdist
+
+# Install dask
+pip install --no-deps -e .[complete]
diff --git a/continuous_integration/travis/run_tests.sh b/continuous_integration/travis/run_tests.sh
new file mode 100644
index 0000000..54bd62b
--- /dev/null
+++ b/continuous_integration/travis/run_tests.sh
@@ -0,0 +1,12 @@
+# Need to make test order deterministic when parallelizing tests, hence PYTHONHASHSEED
+# (see https://github.com/pytest-dev/pytest-xdist/issues/63)
+if [[ $PARALLEL == 'true' ]]; then
+    export XTRATESTARGS="-n3 $XTRATESTARGS"
+    export PYTHONHASHSEED=42
+fi
+
+if [[ $COVERAGE == 'true' ]]; then
+    coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS
+else
+    py.test dask --runslow --verbose $XTRATESTARGS
+fi
diff --git a/continuous_integration/travis/test_imports.sh b/continuous_integration/travis/test_imports.sh
new file mode 100644
index 0000000..61d24e0
--- /dev/null
+++ b/continuous_integration/travis/test_imports.sh
@@ -0,0 +1,34 @@
+test_import () {
+    # Install dependencies
+    if [[ -n "$2" ]]; then
+        output=$(conda install -c conda-forge $2)
+        if [[ $? -eq 1 ]]; then
+            echo $output
+            echo "$1 install failed" >&2
+            exit 1
+        fi
+    fi
+    # Check import
+    python -c "$3"
+    if [[ $? -eq 1 ]]; then
+        echo "$1 import failed" >&2
+        exit 1
+    else
+        echo "$1 import succeeded"
+    fi
+    # Uninstall dependencies
+    if [[ -n "$2" ]]; then
+        output=$(conda uninstall $2)
+    fi
+}
+
+# Create an empty environment
+conda create -n test-imports python=$PYTHON
+source activate test-imports
+
+(test_import "Core" "" "import dask, dask.threaded, dask.optimize") && \
+(test_import "Delayed" "toolz" "import dask.delayed") && \
+(test_import "Bag" "toolz partd cloudpickle" "import dask.bag") && \
+(test_import "Array" "toolz numpy" "import dask.array") && \
+(test_import "Dataframe" "numpy pandas toolz partd cloudpickle" "import dask.dataframe") && \
+(test_import "Distributed" "distributed s3fs" "import dask.distributed")
diff --git a/dask/__init__.py b/dask/__init__.py
index 6b9a7dc..30a3045 100644
--- a/dask/__init__.py
+++ b/dask/__init__.py
@@ -4,14 +4,16 @@ from .core import istask
 from .context import set_options
 from .async import get_sync as get
 try:
-    from .delayed import do, delayed, value
+    from .delayed import do, delayed
 except ImportError:
     pass
 try:
-    from .base import visualize, compute
+    from .base import visualize, compute, persist
 except ImportError:
     pass
 
 from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
+versions = get_versions()
+__version__ = versions['version']
+__git_revision__ = versions['full-revisionid']
+del get_versions, versions
diff --git a/dask/array/__init__.py b/dask/array/__init__.py
index ffb6545..5ad784b 100644
--- a/dask/array/__init__.py
+++ b/dask/array/__init__.py
@@ -3,12 +3,13 @@ from __future__ import absolute_import, division, print_function
 from ..utils import ignoring
 from .core import (Array, stack, concatenate, take, tensordot, transpose,
         from_array, choose, where, coarsen, insert, broadcast_to, ravel,
-        reshape, fromfunction, unique, store, squeeze, topk, bincount,
+        fromfunction, unique, store, squeeze, topk, bincount,
         digitize, histogram, map_blocks, atop, to_hdf5, dot, cov, array,
         dstack, vstack, hstack, to_npy_stack, from_npy_stack, compress,
-        from_delayed, round, swapaxes, repeat)
+        from_delayed, round, swapaxes, repeat, asarray)
 from .core import (around, isnull, notnull, isclose, eye, triu,
                    tril, diag, corrcoef)
+from .reshape import reshape
 from .ufunc import (logaddexp, logaddexp2, conj, exp, log, log2, log10, log1p,
         expm1, sqrt, square, sin, cos, tan, arcsin, arccos, arctan, arctan2,
         hypot, sinh, cosh, tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg,
diff --git a/dask/array/chunk.py b/dask/array/chunk.py
index d5b38a3..f5e2d73 100644
--- a/dask/array/chunk.py
+++ b/dask/array/chunk.py
@@ -142,7 +142,7 @@ def coarsen(reduction, x, axes, trim_excess=False):
         x = x[ind]
 
     # (10, 10) -> (5, 2, 5, 2)
-    newshape = tuple(concat([(x.shape[i] / axes[i], axes[i])
+    newshape = tuple(concat([(x.shape[i] // axes[i], axes[i])
                              for i in range(x.ndim)]))
 
     return reduction(x.reshape(newshape), axis=tuple(range(1, x.ndim * 2, 2)))
@@ -186,3 +186,8 @@ def topk(k, x):
     k = np.minimum(k, len(x))
     ind = np.argpartition(x, -k)[-k:]
     return np.sort(x[ind])[::-1]
+
+
+def arange(start, stop, step, length, dtype):
+    res = np.arange(start, stop, step, dtype)
+    return res[:-1] if len(res) > length else res
diff --git a/dask/array/core.py b/dask/array/core.py
index 45b22eb..c70c26f 100644
--- a/dask/array/core.py
+++ b/dask/array/core.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, print_function
 
 from bisect import bisect
-from collections import Iterable, MutableMapping
+from collections import Iterable, MutableMapping, Mapping
 from collections import Iterator
 from functools import partial, wraps
 import inspect
@@ -10,25 +10,38 @@ from numbers import Number
 import operator
 from operator import add, getitem, mul
 import os
+import sys
+import traceback
 import pickle
 from threading import Lock
 import uuid
 import warnings
 
-from toolz.curried import (pipe, partition, concat, pluck, join, first,
-                           memoize, map, groupby, valmap, accumulate, merge,
-                           reduce, interleave, sliding_window, assoc)
+import toolz
+try:
+    from cytoolz.curried import (partition, concat, pluck, join, first,
+                                 groupby, valmap, accumulate, interleave,
+                                 sliding_window, assoc)
+
+except ImportError:
+    from toolz.curried import (partition, concat, pluck, join, first,
+                               groupby, valmap, accumulate,
+                               interleave, sliding_window, assoc)
+from toolz import pipe, map, reduce
 import numpy as np
 
 from . import chunk
 from .slicing import slice_array
 from . import numpy_compat
 from ..base import Base, tokenize, normalize_token
-from ..utils import (deepmap, ignoring, concrete, is_integer,
-                     IndexCallable, funcname, derived_from)
+from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete,
+                     is_integer, IndexCallable, funcname, derived_from,
+                     SerializableLock, ensure_dict)
 from ..compatibility import unicode, long, getargspec, zip_longest, apply
-from ..optimize import cull
+from ..delayed import to_task_dask
 from .. import threaded, core
+from .. import sharedict
+from ..sharedict import ShareDict
 
 
 def getarray(a, b, lock=None):
@@ -64,7 +77,11 @@ def getarray_nofancy(a, b, lock=None):
     return getarray(a, b, lock=lock)
 
 
-from .optimization import optimize
+def getarray_inline(a, b, lock=None):
+    return getarray(a, b, lock=lock)
+
+
+from .optimization import optimize, fuse_slice
 
 
 def slices_from_chunks(chunks):
@@ -197,7 +214,7 @@ def zero_broadcast_dimensions(lol, nblocks):
     lol_tuples
     """
     f = lambda t: (t[0],) + tuple(0 if d == 1 else i for i, d in zip(t[1:], nblocks))
-    return deepmap(f, lol)
+    return homogeneous_deepmap(f, lol)
 
 
 def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)),
@@ -381,21 +398,38 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
         args = []
         for arg, ind in argpairs:
             tups = lol_tuples((arg,), ind, kd, dummies)
-            tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+            if any(nb == 1 for nb in numblocks[arg]):
+                tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+            else:
+                tups2 = tups
             if concatenate and isinstance(tups2, list):
                 axes = [n for n, i in enumerate(ind) if i in dummies]
                 tups2 = (concatenate_axes, tups2, axes)
             args.append(tups2)
-        valtups.append(tuple(args))
+        valtups.append(args)
+
+    if not kwargs:  # will not be used in an apply, should be a tuple
+        valtups = [tuple(vt) for vt in valtups]
 
     # Add heads to tuples
     keys = [(output,) + kt for kt in keytups]
+
+    dsk = {}
+    # Unpack delayed objects in kwargs
     if kwargs:
-        vals = [(apply, func, list(vt), kwargs) for vt in valtups]
+        task, dsk2 = to_task_dask(kwargs)
+        if dsk2:
+            dsk.update(ensure_dict(dsk2))
+            kwargs2 = task
+        else:
+            kwargs2 = kwargs
+        vals = [(apply, func, vt, kwargs2) for vt in valtups]
     else:
         vals = [(func,) + vt for vt in valtups]
 
-    return dict(zip(keys, vals))
+    dsk.update(dict(zip(keys, vals)))
+
+    return dsk
 
 
 def _concatenate2(arrays, axes=[]):
@@ -435,40 +469,67 @@ def _concatenate2(arrays, axes=[]):
     return np.concatenate(arrays, axis=axes[0])
 
 
+def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True):
+    args = [np.ones((1,) * x.ndim, dtype=x.dtype)
+            if isinstance(x, Array) else x for x in args]
+    try:
+        o = func(*args, **kwargs)
+    except Exception as e:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        tb = ''.join(traceback.format_tb(exc_traceback))
+        suggest = ("Please specify the dtype explicitly using the "
+                   "`dtype` kwarg.\n\n") if suggest_dtype else ""
+        msg = ("`dtype` inference failed in `{0}`.\n\n"
+               "{1}"
+               "Original error is below:\n"
+               "------------------------\n"
+               "{2}\n\n"
+               "Traceback:\n"
+               "---------\n"
+               "{3}").format(funcname, suggest, repr(e), tb)
+    else:
+        msg = None
+    if msg is not None:
+        raise ValueError(msg)
+    return o.dtype
+
+
 def map_blocks(func, *args, **kwargs):
-    """ Map a function across all blocks of a dask array
+    """ Map a function across all blocks of a dask array.
 
     Parameters
     ----------
-    func: callable
-        Function to apply to every block in the array
-    args: dask arrays or constants
-    dtype: np.dtype
-        Datatype of resulting array
-    chunks: tuple (optional)
-        Chunk shape of resulting blocks if the function does not preserve shape
-    drop_axis: number or iterable (optional)
-        Dimensions lost by the function
-    new_axis: number or iterable (optional)
-        New dimensions created by the function
-    **kwargs:
-        Other keyword arguments to pass to function.
-        Values must be constants (not dask.arrays)
-
-    You must also specify the chunks and dtype of the resulting array.  If you
-    don't then we assume that the resulting array has the same block structure
-    as the input.
+    func : callable
+        Function to apply to every block in the array.
+    args : dask arrays or constants
+    dtype : np.dtype, optional
+        The ``dtype`` of the output array. It is recommended to provide this.
+        If not provided, will be inferred by applying the function to a small
+        set of fake data.
+    chunks : tuple, optional
+        Chunk shape of resulting blocks if the function does not preserve
+        shape. If not provided, the resulting array is assumed to have the same
+        block structure as the first input array.
+    drop_axis : number or iterable, optional
+        Dimensions lost by the function.
+    new_axis : number or iterable, optional
+        New dimensions created by the function.
+    name : string, optional
+        The key name to use for the array. If not provided, will be determined
+        by a hash of the arguments.
+    **kwargs :
+        Other keyword arguments to pass to function. Values must be constants
+        (not dask.arrays)
 
     Examples
     --------
-
     >>> import dask.array as da
     >>> x = da.arange(6, chunks=3)
 
     >>> x.map_blocks(lambda x: x * 2).compute()
     array([ 0,  2,  4,  6,  8, 10])
 
-    The ``da.map_blocks`` function can also accept multiple arrays
+    The ``da.map_blocks`` function can also accept multiple arrays.
 
     >>> d = da.arange(5, chunks=2)
     >>> e = da.arange(5, chunks=2)
@@ -477,7 +538,7 @@ def map_blocks(func, *args, **kwargs):
     >>> f.compute()
     array([ 0,  2,  6, 12, 20])
 
-    If function changes shape of the blocks then please provide chunks
+    If the function changes shape of the blocks then you must provide chunks
     explicitly.
 
     >>> y = x.map_blocks(lambda x: x[::2], chunks=((2, 2),))
@@ -494,15 +555,14 @@ def map_blocks(func, *args, **kwargs):
     >>> b = a.map_blocks(lambda x: x[None, :, None], chunks=(1, 6, 1),
     ...                  new_axis=[0, 2])
 
-
-    Map_blocks aligns blocks by block positions without regard to shape.  In
-    the following example we have two arrays with the same number of blocks but
+    Map_blocks aligns blocks by block positions without regard to shape. In the
+    following example we have two arrays with the same number of blocks but
     with different shape and chunk sizes.
 
     >>> x = da.arange(1000, chunks=(100,))
     >>> y = da.arange(100, chunks=(10,))
 
-    The relevant attribute to match is numblocks
+    The relevant attribute to match is numblocks.
 
     >>> x.numblocks
     (10,)
@@ -516,7 +576,7 @@ def map_blocks(func, *args, **kwargs):
     ...     return np.array([a.max(), b.max()])
 
     >>> da.map_blocks(func, x, y, chunks=(2,), dtype='i8')
-    dask.array<..., shape=(20,), dtype=int64, chunksize=(2,)>
+    dask.array<func, shape=(20,), dtype=int64, chunksize=(2,)>
 
     >>> _.compute()
     array([ 99,   9, 199,  19, 299,  29, 399,  39, 499,  49, 599,  59, 699,
@@ -554,7 +614,7 @@ def map_blocks(func, *args, **kwargs):
         raise ValueError("Can't specify drop_axis and new_axis together")
 
     arrs = [a for a in args if isinstance(a, Array)]
-    args = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
+    other = [(i, a) for i, a in enumerate(args) if not isinstance(a, Array)]
 
     argpairs = [(a.name, tuple(range(a.ndim))[::-1]) for a in arrs]
     numblocks = {a.name: a.numblocks for a in arrs}
@@ -571,9 +631,9 @@ def map_blocks(func, *args, **kwargs):
     if block_id:
         kwargs['block_id'] = '__dummy__'
 
-    if args:
+    if other:
         dsk = top(partial_by_order, name, out_ind, *arginds,
-                  numblocks=numblocks, function=func, other=args,
+                  numblocks=numblocks, function=func, other=other,
                   **kwargs)
     else:
         dsk = top(func, name, out_ind, *arginds, numblocks=numblocks,
@@ -584,11 +644,18 @@ def map_blocks(func, *args, **kwargs):
         for k in dsk.keys():
             dsk[k] = dsk[k][:-1] + (assoc(dsk[k][-1], 'block_id', k[1:]),)
 
+    if dtype is None:
+        if block_id:
+            kwargs2 = assoc(kwargs, 'block_id', first(dsk.keys())[1:])
+        else:
+            kwargs2 = kwargs
+        dtype = apply_infer_dtype(func, args, kwargs2, 'map_blocks')
+
     if len(arrs) == 1:
         numblocks = list(arrs[0].numblocks)
     else:
         dims = broadcast_dimensions(argpairs, numblocks)
-        numblocks = [b for (_, b) in reversed(list(dims.items()))]
+        numblocks = [b for (_, b) in sorted(dims.items(), reverse=True)]
 
     if drop_axis:
         if any(numblocks[i] > 1 for i in drop_axis):
@@ -640,7 +707,8 @@ def map_blocks(func, *args, **kwargs):
 
     chunks = tuple(chunks2)
 
-    return Array(merge(dsk, *[a.dask for a in arrs]), name, chunks, dtype)
+    return Array(sharedict.merge((name, dsk), *[a.dask for a in arrs]),
+                 name, chunks, dtype)
 
 
 def broadcast_chunks(*chunkss):
@@ -693,15 +761,14 @@ def squeeze(a, axis=None):
         axis = tuple(i for i, d in enumerate(a.shape) if d == 1)
     b = a.map_blocks(partial(np.squeeze, axis=axis), dtype=a.dtype)
     chunks = tuple(bd for bd in b.chunks if bd != (1,))
+
+    name = 'squeeze-' + tokenize(a, axis)
     old_keys = list(product([b.name], *[range(len(bd)) for bd in b.chunks]))
-    new_keys = list(product([b.name], *[range(len(bd)) for bd in chunks]))
+    new_keys = list(product([name], *[range(len(bd)) for bd in chunks]))
 
-    dsk = b.dask.copy()
-    for o, n in zip(old_keys, new_keys):
-        dsk[n] = dsk[o]
-        del dsk[o]
+    dsk = {n: b.dask[o] for o, n in zip(old_keys, new_keys)}
 
-    return Array(dsk, b.name, chunks, dtype=a.dtype)
+    return Array(sharedict.merge(b.dask, (name, dsk)), name, chunks, dtype=a.dtype)
 
 
 def topk(k, x):
@@ -733,10 +800,10 @@ def topk(k, x):
                        slice(-1, -k - 1, -1))
     chunks = ((k,),)
 
-    return Array(merge(dsk, x.dask), name2, chunks, dtype=x.dtype)
+    return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype)
 
 
-def store(sources, targets, lock=True, compute=True, **kwargs):
+def store(sources, targets, lock=True, regions=None, compute=True, **kwargs):
     """ Store dask arrays in array-like objects, overwrite data in target
 
     This stores dask arrays into object that supports numpy-style setitem
@@ -757,6 +824,10 @@ def store(sources, targets, lock=True, compute=True, **kwargs):
         Whether or not to lock the data stores while storing.
         Pass True (lock each file individually), False (don't lock) or a
         particular ``threading.Lock`` object to be shared among all writes.
+    regions: tuple of slices or iterable of tuple of slices
+        Each ``region`` tuple in ``regions`` should be such that
+        ``target[region].shape = source.shape``
+        for the corresponding source and target in sources and targets, respectively.
     compute: boolean, optional
         If true compute immediately, return ``dask.delayed.Delayed`` otherwise
 
@@ -787,17 +858,27 @@ def store(sources, targets, lock=True, compute=True, **kwargs):
         raise ValueError("Different number of sources [%d] and targets [%d]"
                          % (len(sources), len(targets)))
 
-    updates = [insert_to_ooc(tgt, src, lock=lock)
-               for tgt, src in zip(targets, sources)]
-    dsk = merge([src.dask for src in sources] + updates)
+    if isinstance(regions, tuple) or regions is None:
+        regions = [regions]
+
+    if len(sources) > 1 and len(regions) == 1:
+        regions *= len(sources)
+
+    if len(sources) != len(regions):
+        raise ValueError("Different number of sources [%d] and targets [%d] than regions [%d]"
+                         % (len(sources), len(targets), len(regions)))
+
+    updates = [insert_to_ooc(tgt, src, lock=lock, region=reg)
+               for tgt, src, reg in zip(targets, sources, regions)]
     keys = [key for u in updates for key in u]
+    name = 'store-' + tokenize(*keys)
+    dsk = sharedict.merge((name, toolz.merge(updates)), *[src.dask for src in sources])
     if compute:
         Array._get(dsk, keys, **kwargs)
     else:
         from ..delayed import Delayed
-        name = 'store-' + tokenize(*keys)
-        dsk[name] = keys
-        return Delayed(name, [dsk])
+        dsk.update({name: keys})
+        return Delayed(name, dsk)
 
 
 def blockdims_from_blockshape(shape, chunks):
@@ -805,18 +886,24 @@ def blockdims_from_blockshape(shape, chunks):
 
     >>> blockdims_from_blockshape((10, 10), (4, 3))
     ((4, 4, 2), (3, 3, 3, 1))
+    >>> blockdims_from_blockshape((10, 0), (4, 0))
+    ((4, 4, 2), (0,))
     """
     if chunks is None:
         raise TypeError("Must supply chunks= keyword argument")
     if shape is None:
         raise TypeError("Must supply shape= keyword argument")
+    if np.isnan(sum(shape)) or np.isnan(sum(chunks)):
+        raise ValueError("Array chunk sizes are unknown. shape: %s, chunks: %s"
+                         % (shape, chunks))
     if not all(map(is_integer, chunks)):
         raise ValueError("chunks can only contain integers.")
     if not all(map(is_integer, shape)):
         raise ValueError("shape can only contain integers.")
     shape = tuple(map(int, shape))
     chunks = tuple(map(int, chunks))
-    return tuple((bd,) * (d // bd) + ((d % bd,) if d % bd else ())
+    return tuple(((bd,) * (d // bd) + ((d % bd,) if d % bd else ())
+                 if d else (0,))
                  for d, bd in zip(shape, chunks))
 
 
@@ -856,21 +943,26 @@ class Array(Base):
     --------
     dask.array.from_array
     """
-    __slots__ = 'dask', 'name', '_chunks', '_dtype'
+    __slots__ = 'dask', 'name', '_chunks', 'dtype'
 
     _optimize = staticmethod(optimize)
     _default_get = staticmethod(threaded.get)
     _finalize = staticmethod(finalize)
 
-    def __init__(self, dask, name, chunks, dtype=None, shape=None):
+    def __init__(self, dask, name, chunks, dtype, shape=None):
+        assert isinstance(dask, Mapping)
+        if not isinstance(dask, ShareDict):
+            s = ShareDict()
+            s.update_with_key(dask, key=name)
+            dask = s
         self.dask = dask
         self.name = name
         self._chunks = normalize_chunks(chunks, shape)
         if self._chunks is None:
             raise ValueError(chunks_none_error_message)
-        if dtype is not None:
-            dtype = np.dtype(dtype)
-        self._dtype = dtype
+        if dtype is None:
+            raise ValueError("You must specify the dtype of the array")
+        self.dtype = np.dtype(dtype)
 
     @property
     def _args(self):
@@ -880,7 +972,7 @@ class Array(Base):
         return self._args
... 25596 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.git



More information about the Python-modules-commits mailing list