[Python-modules-commits] [dask.distributed] 01/03: Import dask.distributed_1.14.3+ds.1.orig.tar.gz

Diane Trout diane at moszumanska.debian.org
Wed Jan 11 06:22:08 UTC 2017


This is an automated email from the git hooks/post-receive script.

diane pushed a commit to branch master
in repository dask.distributed.

commit a3e43ddc7b895651f3efad739586f7b1d326f429
Author: Diane Trout <diane at ghic.org>
Date:   Tue Jan 10 21:37:49 2017 -0800

    Import dask.distributed_1.14.3+ds.1.orig.tar.gz
---
 .coveragerc                                        |   20 +
 .gitignore                                         |    3 +
 .travis.yml                                        |   80 +
 AUTHORS.md                                         |    3 +
 LICENSE.txt                                        |   28 +
 MANIFEST.in                                        |   13 +
 README.rst                                         |    6 +
 conftest.py                                        |    5 +
 continuous_integration/Dockerfile                  |   43 +
 continuous_integration/README.md                   |   37 +
 .../docker-files/cdh5-install.sh                   |   17 +
 continuous_integration/docker-files/cloudera.pref  |    3 +
 .../docker-files/libhdfs-build.sh                  |    8 +
 continuous_integration/docker-files/start.sh       |   10 +
 distributed/__init__.py                            |   21 +
 distributed/_ipython_utils.py                      |  246 ++
 distributed/batched.py                             |  214 ++
 distributed/bokeh/__init__.py                      |   35 +
 distributed/bokeh/application.py                   |  114 +
 distributed/bokeh/background/__init__.py           |    0
 distributed/bokeh/background/main.py               |    0
 distributed/bokeh/background/server_lifecycle.py   |  147 +
 distributed/bokeh/components.py                    |  558 +++
 distributed/bokeh/export_tool.coffee               |   44 +
 distributed/bokeh/export_tool.py                   |   33 +
 distributed/bokeh/memory-usage.py                  |   12 +
 distributed/bokeh/processing-stacks.py             |   12 +
 distributed/bokeh/resource-profiles.py             |   13 +
 distributed/bokeh/status/__init__.py               |    0
 distributed/bokeh/status/main.py                   |   39 +
 distributed/bokeh/status/templates/index.html      |   74 +
 distributed/bokeh/task-progress.py                 |   12 +
 distributed/bokeh/task-stream.py                   |   13 +
 distributed/bokeh/tasks/__init__.py                |    0
 distributed/bokeh/tasks/main.py                    |   19 +
 distributed/bokeh/tests/test_application.py        |   47 +
 distributed/bokeh/tests/test_components.py         |   41 +
 distributed/bokeh/tests/test_worker_monitor.py     |   60 +
 distributed/bokeh/utils.py                         |   11 +
 distributed/bokeh/worker-table.py                  |   13 +
 distributed/bokeh/worker_monitor.py                |  106 +
 distributed/bokeh/workers/__init__.py              |    0
 distributed/bokeh/workers/main.py                  |   29 +
 distributed/bokeh/workers/templates/index.html     |   74 +
 distributed/cli/__init__.py                        |    0
 distributed/cli/dask_remote.py                     |   23 +
 distributed/cli/dask_scheduler.py                  |  105 +
 distributed/cli/dask_ssh.py                        |   78 +
 distributed/cli/dask_submit.py                     |   31 +
 distributed/cli/dask_worker.py                     |  193 +
 distributed/cli/dcluster.py                        |   12 +
 distributed/cli/dscheduler.py                      |   12 +
 distributed/cli/dworker.py                         |   12 +
 distributed/cli/tests/test_dask_remote.py          |    9 +
 distributed/cli/tests/test_dask_scheduler.py       |  172 +
 distributed/cli/tests/test_dask_submit.py          |    9 +
 distributed/cli/tests/test_dworker.py              |   38 +
 distributed/cli/utils.py                           |   35 +
 distributed/client.py                              | 2313 ++++++++++++
 distributed/collections.py                         |  257 ++
 distributed/compatibility.py                       |   70 +
 distributed/config.py                              |   35 +
 distributed/config.yaml                            |   13 +
 distributed/core.py                                |  663 ++++
 distributed/deploy/__init__.py                     |    6 +
 distributed/deploy/adaptive.py                     |   89 +
 distributed/deploy/local.py                        |  266 ++
 distributed/deploy/ssh.py                          |  292 ++
 distributed/deploy/tests/test_adaptive.py          |   53 +
 distributed/deploy/tests/test_local.py             |  213 ++
 distributed/deploy/tests/test_ssh.py               |   29 +
 distributed/deploy/utils_test.py                   |   41 +
 distributed/diagnostics/__init__.py                |    9 +
 distributed/diagnostics/eventstream.py             |   73 +
 distributed/diagnostics/plugin.py                  |   52 +
 distributed/diagnostics/progress.py                |  293 ++
 distributed/diagnostics/progress_stream.py         |  213 ++
 distributed/diagnostics/progressbar.py             |  325 ++
 distributed/diagnostics/scheduler.py               |   76 +
 distributed/diagnostics/tests/test_eventstream.py  |   51 +
 distributed/diagnostics/tests/test_plugin.py       |   33 +
 distributed/diagnostics/tests/test_progress.py     |  203 ++
 .../diagnostics/tests/test_progress_stream.py      |  130 +
 distributed/diagnostics/tests/test_progressbar.py  |   92 +
 .../tests/test_scheduler_diagnostics.py            |   54 +
 distributed/diagnostics/tests/test_widgets.py      |  240 ++
 distributed/hdfs.py                                |  139 +
 distributed/http/__init__.py                       |    4 +
 distributed/http/core.py                           |   85 +
 distributed/http/scheduler.py                      |  112 +
 distributed/http/tests/test_scheduler_http.py      |  192 +
 distributed/http/tests/test_worker_http.py         |   93 +
 distributed/http/worker.py                         |   67 +
 distributed/joblib.py                              |   84 +
 distributed/nanny.py                               |  385 ++
 distributed/protocol/__init__.py                   |   17 +
 distributed/protocol/compression.py                |  114 +
 distributed/protocol/core.py                       |  172 +
 distributed/protocol/h5py.py                       |   40 +
 distributed/protocol/netcdf4.py                    |   63 +
 distributed/protocol/numpy.py                      |   87 +
 distributed/protocol/pickle.py                     |   54 +
 distributed/protocol/serialize.py                  |  248 ++
 distributed/protocol/tests/test_h5py.py            |   81 +
 distributed/protocol/tests/test_netcdf4.py         |   94 +
 distributed/protocol/tests/test_numpy.py           |  115 +
 distributed/protocol/tests/test_pickle.py          |   30 +
 distributed/protocol/tests/test_protocol.py        |  173 +
 distributed/protocol/tests/test_protocol_utils.py  |   13 +
 distributed/protocol/tests/test_serialize.py       |  127 +
 distributed/protocol/utils.py                      |   73 +
 distributed/scheduler.py                           | 3023 ++++++++++++++++
 distributed/sizeof.py                              |   53 +
 distributed/submit.py                              |   87 +
 distributed/sync.py                                |   80 +
 distributed/tests/mytestegg-1.0.0-py3.4.egg        |  Bin 0 -> 1823 bytes
 distributed/tests/test_batched.py                  |  282 ++
 distributed/tests/test_client.py                   | 3790 ++++++++++++++++++++
 distributed/tests/test_collections.py              |  361 ++
 distributed/tests/test_compatibility.py            |    9 +
 distributed/tests/test_core.py                     |  266 ++
 distributed/tests/test_hdfs.py                     |  393 ++
 distributed/tests/test_ipython.py                  |  145 +
 distributed/tests/test_joblib.py                   |   61 +
 distributed/tests/test_nanny.py                    |  139 +
 distributed/tests/test_scheduler.py                | 1059 ++++++
 distributed/tests/test_sizeof.py                   |   36 +
 distributed/tests/test_steal.py                    |  130 +
 distributed/tests/test_stress.py                   |  158 +
 distributed/tests/test_submit_cli.py               |   52 +
 distributed/tests/test_submit_remote_client.py     |   58 +
 distributed/tests/test_sync.py                     |   12 +
 distributed/tests/test_threadpoolexecutor.py       |   26 +
 distributed/tests/test_utils.py                    |  284 ++
 distributed/tests/test_utils_comm.py               |   29 +
 distributed/tests/test_utils_test.py               |   34 +
 distributed/tests/test_worker.py                   |  577 +++
 distributed/tests/test_worker_client.py            |   88 +
 distributed/tests/test_worker_failure.py           |  325 ++
 distributed/threadpoolexecutor.py                  |  116 +
 distributed/utils.py                               |  531 +++
 distributed/utils_comm.py                          |  246 ++
 distributed/utils_test.py                          |  458 +++
 distributed/versions.py                            |   63 +
 distributed/worker.py                              |  950 +++++
 distributed/worker_client.py                       |  168 +
 docs/Makefile                                      |  192 +
 docs/make.bat                                      |  263 ++
 docs/requirements.txt                              |    5 +
 docs/source/adaptive.rst                           |  143 +
 docs/source/api.rst                                |   94 +
 docs/source/client.rst                             |  181 +
 docs/source/conf.py                                |  369 ++
 docs/source/develop.rst                            |  142 +
 docs/source/ec2.rst                                |   70 +
 docs/source/efficiency.rst                         |  109 +
 docs/source/examples-overview.rst                  |    7 +
 docs/source/examples/word-count.rst                |  284 ++
 docs/source/faq.rst                                |   41 +
 docs/source/foundations.rst                        |  166 +
 docs/source/images/network.png                     |  Bin 0 -> 161762 bytes
 docs/source/images/network.svg                     |  841 +++++
 docs/source/images/scheduler-detailed.png          |  Bin 0 -> 233778 bytes
 docs/source/images/scheduler-detailed.svg          | 1382 +++++++
 docs/source/images/task-state.dot                  |   21 +
 docs/source/images/task-state.svg                  |  133 +
 docs/source/index.rst                              |  124 +
 docs/source/install.rst                            |   47 +
 docs/source/ipython.rst                            |   84 +
 docs/source/joblib.rst                             |   55 +
 docs/source/journey.rst                            |  187 +
 docs/source/local-cluster.rst                      |   38 +
 docs/source/locality.rst                           |  199 +
 docs/source/manage-computation.rst                 |  181 +
 docs/source/memory.rst                             |  183 +
 docs/source/plugins.rst                            |    5 +
 docs/source/protocol.rst                           |  224 ++
 docs/source/publish.rst                            |   98 +
 docs/source/queues.rst                             |  194 +
 docs/source/quickstart.rst                         |  117 +
 docs/source/related-work.rst                       |  216 ++
 docs/source/resilience.rst                         |   84 +
 docs/source/scheduling-policies.rst                |  110 +
 docs/source/scheduling-state.rst                   |  390 ++
 docs/source/serialization.rst                      |   49 +
 docs/source/setup.rst                              |  204 ++
 docs/source/submitting-applications.rst            |   52 +
 docs/source/task-launch.rst                        |  168 +
 docs/source/web.rst                                |  183 +
 docs/source/work-stealing.rst                      |  109 +
 docs/source/worker.rst                             |  144 +
 release-notes.md                                   |  132 +
 requirements.txt                                   |   11 +
 setup.py                                           |   49 +
 194 files changed, 34007 insertions(+)

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..8038e8d
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,20 @@
+[run]
+include = 
+    distributed/*
+omit =
+    distributed/tests/test*
+    distributed/hdfs.py
+    distributed/cluster.py
+    distributed/*/tests/test*
+    distributed/compatibility.py
+    distributed/cli/utils.py
+    distributed/utils_test.py
+    distributed/deploy/ssh.py
+    distributed/_ipython_utils.py
+
+[report]
+show_missing = True
+
+[html]
+directory = coverage_html_report
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..81cc238
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*.py~
+docs/build
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..5cb3e10
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,80 @@
+language: python
+sudo: false
+services:
+  - docker
+
+matrix:
+  fast_finish: true
+  include:
+    - python: "2.7"
+    - python: "3.4"
+    - python: "3.5"
+      env: HDFS=false
+    - python: "2.7"
+      env: HDFS=true
+      sudo: true
+      dist: trusty
+    - python: "3.5"
+      env: HDFS=true
+      sudo: true
+      dist: trusty
+
+before_install:
+  - |
+    if [[ $HDFS == true ]]; then
+        if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
+          export DOCKER_ENV=/opt/conda;
+        else
+          export DOCKER_ENV=/opt/conda/envs/py3;
+        fi
+        echo $DOCKER_ENV
+        pwd
+        pushd continuous_integration
+        docker build -t distributed-hdfs .
+        popd
+        docker run -d -p 8020:8020 -p 50070:50070 -v $(pwd):/distributed distributed-hdfs
+        export CONTAINER_ID=$(docker ps -l -q)
+        sleep 60  # Wait for namenode and datanode
+    fi;
+
+install:
+  # Install conda
+  - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
+  - conda config --set always_yes yes --set changeps1 no
+  - conda update conda
+
+  # Install dependencies
+  - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION
+  - source activate test-environment
+  - conda install pytest coverage tornado toolz dill futures dask ipywidgets psutil bokeh requests joblib mock ipykernel jupyter_client h5py netcdf4
+  - pip install git+https://github.com/dask/dask.git --upgrade
+  - pip install git+https://github.com/joblib/joblib.git --upgrade
+  - pip install git+https://github.com/dask/s3fs.git --upgrade
+
+  # Install distributed
+  - python setup.py install
+
+  - |
+    if [[ $HDFS == true ]]; then
+        pwd
+        docker exec -it $CONTAINER_ID $DOCKER_ENV/bin/python setup.py install
+    fi;
+
+script:
+    - |
+      if [[ $HDFS == true ]]; then
+        pwd
+        docker exec -it $CONTAINER_ID $DOCKER_ENV/bin/py.test distributed/tests/test_hdfs.py --verbose
+      elif [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then
+        coverage run $(which py.test) distributed -m "not avoid_travis" --verbose;
+      else
+        py.test -m "not avoid_travis" distributed --verbose;
+      fi;
+
+after_success:
+    - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then coverage report; pip install coveralls ; coveralls ; fi
+
+notifications:
+  email: false
diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000..66d2ce2
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,3 @@
+[Matthew Rocklin](http://matthewrocklin.com)
+
+[Michael Broxton](http://graphics.stanford.edu/~broxton/)
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..71ddeb2
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2015-2016, Continuum Analytics, Inc. and contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+Neither the name of Continuum Analytics nor the names of any contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..0b8c1b2
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,13 @@
+recursive-include distributed *.py
+recursive-include distributed *.js
+recursive-include distributed *.coffee
+recursive-include docs *.rst
+
+include setup.py
+include README.rst
+include LICENSE.txt
+include MANIFEST.in
+include requirements.txt
+include distributed/config.yaml
+
+prune docs/_build
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..2d2d285
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,6 @@
+Distributed
+===========
+
+A library for distributed computation.  See documentation_ for more details.
+
+.. _documentation: https://distributed.readthedocs.io/en/latest
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..b5972ff
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,5 @@
+# https://pytest.org/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option
+import pytest
+def pytest_addoption(parser):
+    parser.addoption("--runslow", action="store_true", help="run slow tests")
+
diff --git a/continuous_integration/Dockerfile b/continuous_integration/Dockerfile
new file mode 100644
index 0000000..6d0bef4
--- /dev/null
+++ b/continuous_integration/Dockerfile
@@ -0,0 +1,43 @@
+FROM ubuntu:trusty
+
+# conda
+RUN apt-get update && apt-get install -y -q curl bzip2 git
+RUN curl https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -o /tmp/miniconda.sh
+RUN /bin/bash /tmp/miniconda.sh -b -p /opt/conda
+RUN rm /tmp/miniconda.sh
+ENV PATH /opt/conda/bin:$PATH
+
+# hdfs3 - python 2
+RUN apt-get install -y -q protobuf-compiler libprotobuf-dev
+ENV LIBHDFS3_CONF /etc/hadoop/conf/hdfs-site.xml
+RUN /opt/conda/bin/conda install -y -q libxml2 krb5 boost ipython pytest pip pandas cython mock
+RUN /opt/conda/bin/conda install -y -q libhdfs3 libgsasl libntlm -c dask
+RUN /opt/conda/bin/pip install git+https://github.com/dask/hdfs3 --upgrade
+RUN /opt/conda/bin/pip install git+https://github.com/dask/s3fs --upgrade
+RUN /opt/conda/bin/pip install git+https://github.com/blaze/dask --upgrade
+# hdfs3 - python 3
+RUN conda create -n py3 -y python=3
+RUN conda install -n py3 -y -q libxml2 krb5 boost ipython pytest pip pandas cython mock
+RUN conda install -n py3 libhdfs3 libgsasl libntlm -c dask
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/dask/hdfs3 --upgrade
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/dask/s3fs --upgrade
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/blaze/dask --upgrade
+
+# Cloudera repositories
+RUN curl -s http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/archive.key | apt-key add -
+RUN echo 'deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' > /etc/apt/sources.list.d/cloudera.list
+RUN echo 'deb-src http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' >> /etc/apt/sources.list.d/cloudera.list
+ADD docker-files/cloudera.pref /etc/apt/preferences.d/cloudera.pref
+
+# Install CDH5 in a single node: Pseudo Distributed
+ADD docker-files/cdh5-install.sh /tmp/cdh5-install.sh
+RUN bash /tmp/cdh5-install.sh
+
+EXPOSE 8020
+EXPOSE 50070
+
+VOLUME /distributed
+WORKDIR /distributed
+
+ADD docker-files/start.sh /tmp/start.sh
+CMD ["bash", "/tmp/start.sh"]
diff --git a/continuous_integration/README.md b/continuous_integration/README.md
new file mode 100644
index 0000000..a3c1e4d
--- /dev/null
+++ b/continuous_integration/README.md
@@ -0,0 +1,37 @@
+# Continuous Integration
+
+## Local test
+
+Requirements:
+-  `docker`
+
+Build the container:
+```bash
+docker build -t distributed-hdfs .
+```
+
+Start the container and wait for the it to be ready:
+
+```bash
+docker run -it -p 8020:8020 -p 50070:50070 -v $(pwd):/distributed distributed-hdfs
+```
+
+Now the port `8020` and `50070` are in the host are pointing to the container and the source code (as a shared volume) is available in the container under `/distributed`
+
+Run the following from the root of this project directory to start a bash
+session in the running container:
+
+```bash
+# Get the container ID
+export CONTAINER_ID=$(docker ps -l -q)
+
+# Start the bash session
+docker exec -it $CONTAINER_ID bash
+```
+
+Now that we are in the container we can install the library and run the test:
+
+```bash
+python setup.py install
+py.test distributed -s -vv
+```
diff --git a/continuous_integration/docker-files/cdh5-install.sh b/continuous_integration/docker-files/cdh5-install.sh
new file mode 100644
index 0000000..141aa0c
--- /dev/null
+++ b/continuous_integration/docker-files/cdh5-install.sh
@@ -0,0 +1,17 @@
+# Install CDH5 in a single node: Pseudo Distributed
+# Docs: http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cdh_qs_yarn_pseudo.html
+
+apt-get update && apt-get install -y -q openjdk-7-jre-headless hadoop-conf-pseudo
+
+# Step 1: Format the NameNode
+sudo -u hdfs hdfs namenode -format -force
+
+# Step 2: Start HDFS
+for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
+
+# Step 3: Create the directories needed for Hadoop processes
+bash /usr/lib/hadoop/libexec/init-hdfs.sh
+
+# Step 6: Create User Directories
+# sudo -u hdfs hdfs dfs -mkdir -p /user/hadoop
+# sudo -u hdfs hdfs dfs -chown hadoop /user/hadoop
diff --git a/continuous_integration/docker-files/cloudera.pref b/continuous_integration/docker-files/cloudera.pref
new file mode 100644
index 0000000..e5a1eae
--- /dev/null
+++ b/continuous_integration/docker-files/cloudera.pref
@@ -0,0 +1,3 @@
+Package: *
+Pin: release o=Cloudera, l=Cloudera
+Pin-Priority: 501
diff --git a/continuous_integration/docker-files/libhdfs-build.sh b/continuous_integration/docker-files/libhdfs-build.sh
new file mode 100644
index 0000000..6de7a8a
--- /dev/null
+++ b/continuous_integration/docker-files/libhdfs-build.sh
@@ -0,0 +1,8 @@
+cd /opt/libhdfs3
+
+mkdir build
+pushd build
+../bootstrap --prefix=/usr/local
+make
+make install
+popd
diff --git a/continuous_integration/docker-files/start.sh b/continuous_integration/docker-files/start.sh
new file mode 100644
index 0000000..5cd1a04
--- /dev/null
+++ b/continuous_integration/docker-files/start.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Start HDFS
+sudo service hadoop-hdfs-namenode start
+sudo service hadoop-hdfs-datanode start
+
+echo "Ready"
+
+# Block
+sleep infinity
diff --git a/distributed/__init__.py b/distributed/__init__.py
new file mode 100644
index 0000000..8edad9c
--- /dev/null
+++ b/distributed/__init__.py
@@ -0,0 +1,21 @@
+from __future__ import print_function, division, absolute_import
+
+from .config import config
+from .core import connect, read, write, rpc
+from .deploy import LocalCluster
+from .diagnostics import progress
+from .client import (Client, Executor, CompatibleExecutor, wait, as_completed,
+        default_client)
+from .nanny import Nanny
+from .scheduler import Scheduler
+from .utils import sync
+from .worker import Worker
+from .worker_client import local_client
+
+try:
+    from .collections import futures_to_collection
+except:
+    pass
+
+
+__version__ = '1.14.3'
diff --git a/distributed/_ipython_utils.py b/distributed/_ipython_utils.py
new file mode 100644
index 0000000..112e472
--- /dev/null
+++ b/distributed/_ipython_utils.py
@@ -0,0 +1,246 @@
+"""Utilities for integrating with IPython
+
+These functions should probably reside in Jupyter and IPython repositories,
+after which we can import them instead of having our own definitions.
+"""
+
+from __future__ import print_function
+
+import atexit
+import os
+try:
+    import queue
+except ImportError:
+    # Python 2
+    import Queue as queue
+from subprocess import Popen
+import sys
+from threading import Thread
+from uuid import uuid4
+
+from tornado.gen import TimeoutError
+from tornado.ioloop import IOLoop
+from threading import Event
+
+from IPython import get_ipython
+from jupyter_client import BlockingKernelClient, write_connection_file
+from jupyter_core.paths import jupyter_runtime_dir
+
+
+OUTPUT_TIMEOUT = 10
+
+
+def run_cell_remote(ip, kc, cell):
+    """Run a cell on a KernelClient
+
+    Any output from the cell will be redisplayed in the local session.
+    """
+    msg_id = kc.execute(cell)
+
+    in_kernel = getattr(ip, 'kernel', False)
+    if in_kernel:
+        socket = ip.display_pub.pub_socket
+        session = ip.display_pub.session
+        parent_header = ip.display_pub.parent_header
+
+    while True:
+        try:
+            msg = kc.get_iopub_msg(timeout=OUTPUT_TIMEOUT)
+        except queue.Empty:
+            raise TimeoutError("Timeout waiting for IPython output")
+
+        if msg['parent_header'].get('msg_id') != msg_id:
+            continue
+        msg_type = msg['header']['msg_type']
+        content = msg['content']
+        if msg_type == 'status':
+            if content['execution_state'] == 'idle':
+                # idle means output is done
+                break
+        elif msg_type == 'stream':
+            stream = getattr(sys, content['name'])
+            stream.write(content['text'])
+        elif msg_type in ('display_data', 'execute_result', 'error'):
+            if in_kernel:
+                session.send(socket, msg_type, content, parent=parent_header)
+            else:
+                if msg_type == 'error':
+                    print('\n'.join(content['traceback']), file=sys.stderr)
+                else:
+                    sys.stdout.write(content['data'].get('text/plain', ''))
+        else:
+            pass
+
+
+def register_worker_magic(connection_info, magic_name='worker'):
+    """Register a %worker magic, given connection_info.
+
+    Both a line and cell magic are registered,
+    which run the given cell in a remote kernel.
+    """
+    ip = get_ipython()
+    info = dict(connection_info) # copy
+    key = info.pop('key')
+    kc = BlockingKernelClient(**connection_info)
+    kc.session.key = key
+    kc.start_channels()
+    def remote(line, cell=None):
+        """Run the current cell on a remote IPython kernel"""
+        if cell is None:
+            # both line and cell magic
+            cell = line
+        run_cell_remote(ip, kc, cell)
+    remote.client = kc # preserve reference on kc, largely for mocking
+    ip.register_magic_function(remote, magic_kind='line', magic_name=magic_name)
+    ip.register_magic_function(remote, magic_kind='cell', magic_name=magic_name)
+
+
+def remote_magic(line, cell=None):
+    """A magic for running code on a specified remote worker
+
+    The connection_info dict of the worker will be looked up
+    as the first positional arg to the magic.
+    The rest of the line (or the entire cell for a %%cell magic)
+    will be passed to the remote kernel.
+
+    Usage:
+
+        info = e.start_ipython(worker)[worker]
+        %remote info print(worker.data)
+    """
+    # get connection info from IPython's user namespace
+    ip = get_ipython()
+    split_line = line.split(None, 1)
+    info_name = split_line[0]
+    if info_name not in ip.user_ns:
+        raise NameError(info_name)
+    connection_info = dict(ip.user_ns[info_name])
+
+    if not cell: # line magic, use the rest of the line
+        if len(split_line) == 1:
+            raise ValueError("I need some code to run!")
+        cell = split_line[1]
+
+    # turn info dict to hashable str for use as lookup key in _clients cache
+    key = ','.join(map(str, sorted(connection_info.items())))
+    session_key = connection_info.pop('key')
+
+    if key in remote_magic._clients:
+        kc = remote_magic._clients[key]
+    else:
+        kc = BlockingKernelClient(**connection_info)
+        kc.session.key = session_key
+        kc.start_channels()
+        kc.wait_for_ready(timeout=10)
+        remote_magic._clients[key] = kc
+
+    # actually run the code
+    run_cell_remote(ip, kc, cell)
+
+# cache clients for re-use in remote magic
+remote_magic._clients = {}
+
+
+def register_remote_magic(magic_name='remote'):
+    """Define the parameterized %remote magic
+
+    See remote_magic above for details.
+    """
+    ip = get_ipython()
+    if ip is None:
+        return # do nothing if IPython's not running
+    ip.register_magic_function(remote_magic, magic_kind='line', magic_name=magic_name)
+    ip.register_magic_function(remote_magic, magic_kind='cell', magic_name=magic_name)
+
+
+def connect_qtconsole(connection_info, name=None, extra_args=None):
+    """Open a QtConsole connected to a worker who has the given future
+
+    - identify worker with who_has
+    - start IPython kernel on the worker
+    - start qtconsole connected to the kernel
+    """
+    runtime_dir = jupyter_runtime_dir()
+    if name is None:
+        name = uuid4().hex
+
+    path = os.path.join(runtime_dir, name + '.json')
+    write_connection_file(path, **connection_info)
+    cmd = ['jupyter', 'qtconsole', '--existing', path]
+    if extra_args:
+        cmd.extend(extra_args)
+    Popen(cmd)
+    def _cleanup_connection_file():
+        """Cleanup our connection file when we exit."""
+        try:
+            os.remove(path)
+        except OSError:
+            pass
+    atexit.register(_cleanup_connection_file)
+
+
+def start_ipython(ip=None, ns=None, log=None):
+    """Start an IPython kernel in a thread
+
+    Parameters
+    ----------
+
+    ip: str
+        The IP address to listen on (likely the parent object's ip).
+    ns: dict
+        Any names that should be injected into the IPython namespace.
+    log: logger instance
+        Hook up IPython's logging to an existing logger instead of the default.
+    """
+    from IPython import get_ipython
+    if get_ipython() is not None:
+        raise RuntimeError("Cannot start IPython, it's already running.")
+
+    from zmq.eventloop.ioloop import ZMQIOLoop
+    from ipykernel.kernelapp import IPKernelApp
+    # save the global IOLoop instance
+    # since IPython relies on it, but we are going to put it in a thread.
+    save_inst = IOLoop.instance()
+    IOLoop.clear_instance()
+    zmq_loop = ZMQIOLoop()
+    zmq_loop.install()
+
+    # start IPython, disabling its signal handlers that won't work due to running in a thread:
+    app = IPKernelApp.instance(log=log)
+    # Don't connect to the history database
+    app.config.HistoryManager.hist_file = ':memory:'
+    # listen on all interfaces, so remote clients can connect:
+    if ip:
+        app.ip = ip
+    # disable some signal handling, logging
+    noop = lambda : None
+    app.init_signal = noop
+    app.log_connection_info = noop
+
+    # start IPython in a thread
+    # initialization happens in the thread to avoid threading problems
+    # with the sqlite history
+    evt = Event()
+    def _start():
+        app.initialize([])
+        app.kernel.pre_handler_hook = noop
+        app.kernel.post_handler_hook = noop
+        app.kernel.start()
+        app.kernel.loop = IOLoop.instance()
+        # save self in the IPython namespace as 'worker'
+        # inject things into the IPython namespace
+        if ns:
+            app.kernel.shell.user_ns.update(ns)
+        evt.set()
+        zmq_loop.start()
+
+    zmq_loop_thread = Thread(target=_start)
+    zmq_loop_thread.daemon = True
+    zmq_loop_thread.start()
+    assert evt.wait(timeout=5), "IPython didn't start in a reasonable amount of time."
+
+    # put the global IOLoop instance back:
+    IOLoop.clear_instance()
+    save_inst.install()
+    return app
+
diff --git a/distributed/batched.py b/distributed/batched.py
new file mode 100644
index 0000000..2aeabf6
--- /dev/null
+++ b/distributed/batched.py
@@ -0,0 +1,214 @@
+from __future__ import print_function, division, absolute_import
+
+from datetime import timedelta
+import logging
+from timeit import default_timer
+
+from tornado import gen
+from tornado.queues import Queue
+from tornado.iostream import StreamClosedError
+from tornado.ioloop import PeriodicCallback, IOLoop
+
+from .core import read, write
+from .utils import log_errors
+
+
+logger = logging.getLogger(__name__)
+
+
+class BatchedSend(object):
+    """ Batch messages in batches on a stream
+
+    This takes an IOStream and an interval (in ms) and ensures that we send no
+    more than one message every interval milliseconds.  We send lists of
+    messages.
+
+    Example
+    -------
+    >>> stream = yield connect(ip, port)
+    >>> bstream = BatchedSend(interval=10)  # 10 ms
+    >>> bstream.start(stream)
+    >>> bstream.send('Hello,')
+    >>> bstream.send('world!')
+
+    On the other side, the recipient will get a message like the following::
+
+        ['Hello,', 'world!']
+    """
+    def __init__(self, interval, loop=None):
+        self.loop = loop or IOLoop.current()
+        self.interval = interval / 1000.
+        self.last_transmission = 0
+        self.buffer = []
+        self.stream = None
+        self.last_payload = []
+        self.last_send = gen.sleep(0)
+        self.message_count = 0
+        self.batch_count = 0
+
+    def start(self, stream):
+        self.stream = stream
+        if self.buffer:
+            self.send_next()
+
+    def __str__(self):
+        return '<BatchedSend: %d in buffer>' % len(self.buffer)
+
+    __repr__ = __str__
+
+    @gen.coroutine
+    def send_next(self, wait=True):
+        try:
+            now = default_timer()
+            if wait:
+                wait_time = min(self.last_transmission + self.interval - now,
+                                self.interval)
+                yield gen.sleep(wait_time)
+            yield self.last_send
+            self.buffer, payload = [], self.buffer
+            self.last_payload = payload
+            self.last_transmission = now
+            self.batch_count += 1
+            self.last_send = write(self.stream, payload)
+        except Exception as e:
+            logger.exception(e)
+            raise
+
+    @gen.coroutine
+    def _write(self, payload):
+        yield gen.sleep(0)
+        yield write(self.stream, payload)
+
+    def send(self, msg):
+        """ Send a message to the other side
+
+        This completes quickly and synchronously
+        """
+        try:
+            self.message_count += 1
+            if self.stream is None:  # not yet started
+                self.buffer.append(msg)
+                return
+
+            if self.stream._closed:
+                raise StreamClosedError()
+
+            if self.buffer:
+                self.buffer.append(msg)
+                return
+
+            # If we're new and early,
+            now = default_timer()
+            if (now < self.last_transmission + self.interval
+                or not self.last_send._done):
+                self.buffer.append(msg)
+                self.loop.add_callback(self.send_next)
+                return
+
+            self.buffer.append(msg)
+            self.loop.add_callback(self.send_next, wait=False)
+        except StreamClosedError:
+            raise
+        except Exception as e:
+            logger.exception(e)
+
+    @gen.coroutine
+    def close(self, ignore_closed=False):
+        """ Flush existing messages and then close stream """
+        try:
+            if self.stream._write_buffer:
+                yield self.last_send
+            if self.buffer:
+                self.buffer, payload = [], self.buffer
+                yield write(self.stream, payload)
+        except StreamClosedError:
+            if not ignore_closed:
+                raise
+        self.stream.close()
+
+
+class BatchedStream(object):
+    """ Mostly obsolete, see BatchedSend """
+    def __init__(self, stream, interval):
+        self.stream = stream
+        self.interval = interval / 1000.
+        self.last_transmission = default_timer()
+        self.send_q = Queue()
+        self.recv_q = Queue()
+        self._background_send_coroutine = self._background_send()
+        self._background_recv_coroutine = self._background_recv()
+        self._broken = None
+
+        self.pc = PeriodicCallback(lambda: None, 100)
+        self.pc.start()
+
+    @gen.coroutine
+    def _background_send(self):
+        with log_errors():
+            while True:
+                msg = yield self.send_q.get()
+                if msg == 'close':
+                    break
+                msgs = [msg]
+                now = default_timer()
... 34349 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.distributed.git



More information about the Python-modules-commits mailing list