[Python-modules-commits] [dask.distributed] 01/03: Import dask.distributed_1.14.3+ds.1.orig.tar.gz
Diane Trout
diane at moszumanska.debian.org
Wed Jan 11 06:22:08 UTC 2017
This is an automated email from the git hooks/post-receive script.
diane pushed a commit to branch master
in repository dask.distributed.
commit a3e43ddc7b895651f3efad739586f7b1d326f429
Author: Diane Trout <diane at ghic.org>
Date: Tue Jan 10 21:37:49 2017 -0800
Import dask.distributed_1.14.3+ds.1.orig.tar.gz
---
.coveragerc | 20 +
.gitignore | 3 +
.travis.yml | 80 +
AUTHORS.md | 3 +
LICENSE.txt | 28 +
MANIFEST.in | 13 +
README.rst | 6 +
conftest.py | 5 +
continuous_integration/Dockerfile | 43 +
continuous_integration/README.md | 37 +
.../docker-files/cdh5-install.sh | 17 +
continuous_integration/docker-files/cloudera.pref | 3 +
.../docker-files/libhdfs-build.sh | 8 +
continuous_integration/docker-files/start.sh | 10 +
distributed/__init__.py | 21 +
distributed/_ipython_utils.py | 246 ++
distributed/batched.py | 214 ++
distributed/bokeh/__init__.py | 35 +
distributed/bokeh/application.py | 114 +
distributed/bokeh/background/__init__.py | 0
distributed/bokeh/background/main.py | 0
distributed/bokeh/background/server_lifecycle.py | 147 +
distributed/bokeh/components.py | 558 +++
distributed/bokeh/export_tool.coffee | 44 +
distributed/bokeh/export_tool.py | 33 +
distributed/bokeh/memory-usage.py | 12 +
distributed/bokeh/processing-stacks.py | 12 +
distributed/bokeh/resource-profiles.py | 13 +
distributed/bokeh/status/__init__.py | 0
distributed/bokeh/status/main.py | 39 +
distributed/bokeh/status/templates/index.html | 74 +
distributed/bokeh/task-progress.py | 12 +
distributed/bokeh/task-stream.py | 13 +
distributed/bokeh/tasks/__init__.py | 0
distributed/bokeh/tasks/main.py | 19 +
distributed/bokeh/tests/test_application.py | 47 +
distributed/bokeh/tests/test_components.py | 41 +
distributed/bokeh/tests/test_worker_monitor.py | 60 +
distributed/bokeh/utils.py | 11 +
distributed/bokeh/worker-table.py | 13 +
distributed/bokeh/worker_monitor.py | 106 +
distributed/bokeh/workers/__init__.py | 0
distributed/bokeh/workers/main.py | 29 +
distributed/bokeh/workers/templates/index.html | 74 +
distributed/cli/__init__.py | 0
distributed/cli/dask_remote.py | 23 +
distributed/cli/dask_scheduler.py | 105 +
distributed/cli/dask_ssh.py | 78 +
distributed/cli/dask_submit.py | 31 +
distributed/cli/dask_worker.py | 193 +
distributed/cli/dcluster.py | 12 +
distributed/cli/dscheduler.py | 12 +
distributed/cli/dworker.py | 12 +
distributed/cli/tests/test_dask_remote.py | 9 +
distributed/cli/tests/test_dask_scheduler.py | 172 +
distributed/cli/tests/test_dask_submit.py | 9 +
distributed/cli/tests/test_dworker.py | 38 +
distributed/cli/utils.py | 35 +
distributed/client.py | 2313 ++++++++++++
distributed/collections.py | 257 ++
distributed/compatibility.py | 70 +
distributed/config.py | 35 +
distributed/config.yaml | 13 +
distributed/core.py | 663 ++++
distributed/deploy/__init__.py | 6 +
distributed/deploy/adaptive.py | 89 +
distributed/deploy/local.py | 266 ++
distributed/deploy/ssh.py | 292 ++
distributed/deploy/tests/test_adaptive.py | 53 +
distributed/deploy/tests/test_local.py | 213 ++
distributed/deploy/tests/test_ssh.py | 29 +
distributed/deploy/utils_test.py | 41 +
distributed/diagnostics/__init__.py | 9 +
distributed/diagnostics/eventstream.py | 73 +
distributed/diagnostics/plugin.py | 52 +
distributed/diagnostics/progress.py | 293 ++
distributed/diagnostics/progress_stream.py | 213 ++
distributed/diagnostics/progressbar.py | 325 ++
distributed/diagnostics/scheduler.py | 76 +
distributed/diagnostics/tests/test_eventstream.py | 51 +
distributed/diagnostics/tests/test_plugin.py | 33 +
distributed/diagnostics/tests/test_progress.py | 203 ++
.../diagnostics/tests/test_progress_stream.py | 130 +
distributed/diagnostics/tests/test_progressbar.py | 92 +
.../tests/test_scheduler_diagnostics.py | 54 +
distributed/diagnostics/tests/test_widgets.py | 240 ++
distributed/hdfs.py | 139 +
distributed/http/__init__.py | 4 +
distributed/http/core.py | 85 +
distributed/http/scheduler.py | 112 +
distributed/http/tests/test_scheduler_http.py | 192 +
distributed/http/tests/test_worker_http.py | 93 +
distributed/http/worker.py | 67 +
distributed/joblib.py | 84 +
distributed/nanny.py | 385 ++
distributed/protocol/__init__.py | 17 +
distributed/protocol/compression.py | 114 +
distributed/protocol/core.py | 172 +
distributed/protocol/h5py.py | 40 +
distributed/protocol/netcdf4.py | 63 +
distributed/protocol/numpy.py | 87 +
distributed/protocol/pickle.py | 54 +
distributed/protocol/serialize.py | 248 ++
distributed/protocol/tests/test_h5py.py | 81 +
distributed/protocol/tests/test_netcdf4.py | 94 +
distributed/protocol/tests/test_numpy.py | 115 +
distributed/protocol/tests/test_pickle.py | 30 +
distributed/protocol/tests/test_protocol.py | 173 +
distributed/protocol/tests/test_protocol_utils.py | 13 +
distributed/protocol/tests/test_serialize.py | 127 +
distributed/protocol/utils.py | 73 +
distributed/scheduler.py | 3023 ++++++++++++++++
distributed/sizeof.py | 53 +
distributed/submit.py | 87 +
distributed/sync.py | 80 +
distributed/tests/mytestegg-1.0.0-py3.4.egg | Bin 0 -> 1823 bytes
distributed/tests/test_batched.py | 282 ++
distributed/tests/test_client.py | 3790 ++++++++++++++++++++
distributed/tests/test_collections.py | 361 ++
distributed/tests/test_compatibility.py | 9 +
distributed/tests/test_core.py | 266 ++
distributed/tests/test_hdfs.py | 393 ++
distributed/tests/test_ipython.py | 145 +
distributed/tests/test_joblib.py | 61 +
distributed/tests/test_nanny.py | 139 +
distributed/tests/test_scheduler.py | 1059 ++++++
distributed/tests/test_sizeof.py | 36 +
distributed/tests/test_steal.py | 130 +
distributed/tests/test_stress.py | 158 +
distributed/tests/test_submit_cli.py | 52 +
distributed/tests/test_submit_remote_client.py | 58 +
distributed/tests/test_sync.py | 12 +
distributed/tests/test_threadpoolexecutor.py | 26 +
distributed/tests/test_utils.py | 284 ++
distributed/tests/test_utils_comm.py | 29 +
distributed/tests/test_utils_test.py | 34 +
distributed/tests/test_worker.py | 577 +++
distributed/tests/test_worker_client.py | 88 +
distributed/tests/test_worker_failure.py | 325 ++
distributed/threadpoolexecutor.py | 116 +
distributed/utils.py | 531 +++
distributed/utils_comm.py | 246 ++
distributed/utils_test.py | 458 +++
distributed/versions.py | 63 +
distributed/worker.py | 950 +++++
distributed/worker_client.py | 168 +
docs/Makefile | 192 +
docs/make.bat | 263 ++
docs/requirements.txt | 5 +
docs/source/adaptive.rst | 143 +
docs/source/api.rst | 94 +
docs/source/client.rst | 181 +
docs/source/conf.py | 369 ++
docs/source/develop.rst | 142 +
docs/source/ec2.rst | 70 +
docs/source/efficiency.rst | 109 +
docs/source/examples-overview.rst | 7 +
docs/source/examples/word-count.rst | 284 ++
docs/source/faq.rst | 41 +
docs/source/foundations.rst | 166 +
docs/source/images/network.png | Bin 0 -> 161762 bytes
docs/source/images/network.svg | 841 +++++
docs/source/images/scheduler-detailed.png | Bin 0 -> 233778 bytes
docs/source/images/scheduler-detailed.svg | 1382 +++++++
docs/source/images/task-state.dot | 21 +
docs/source/images/task-state.svg | 133 +
docs/source/index.rst | 124 +
docs/source/install.rst | 47 +
docs/source/ipython.rst | 84 +
docs/source/joblib.rst | 55 +
docs/source/journey.rst | 187 +
docs/source/local-cluster.rst | 38 +
docs/source/locality.rst | 199 +
docs/source/manage-computation.rst | 181 +
docs/source/memory.rst | 183 +
docs/source/plugins.rst | 5 +
docs/source/protocol.rst | 224 ++
docs/source/publish.rst | 98 +
docs/source/queues.rst | 194 +
docs/source/quickstart.rst | 117 +
docs/source/related-work.rst | 216 ++
docs/source/resilience.rst | 84 +
docs/source/scheduling-policies.rst | 110 +
docs/source/scheduling-state.rst | 390 ++
docs/source/serialization.rst | 49 +
docs/source/setup.rst | 204 ++
docs/source/submitting-applications.rst | 52 +
docs/source/task-launch.rst | 168 +
docs/source/web.rst | 183 +
docs/source/work-stealing.rst | 109 +
docs/source/worker.rst | 144 +
release-notes.md | 132 +
requirements.txt | 11 +
setup.py | 49 +
194 files changed, 34007 insertions(+)
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..8038e8d
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,20 @@
+[run]
+include =
+ distributed/*
+omit =
+ distributed/tests/test*
+ distributed/hdfs.py
+ distributed/cluster.py
+ distributed/*/tests/test*
+ distributed/compatibility.py
+ distributed/cli/utils.py
+ distributed/utils_test.py
+ distributed/deploy/ssh.py
+ distributed/_ipython_utils.py
+
+[report]
+show_missing = True
+
+[html]
+directory = coverage_html_report
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..81cc238
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*.py~
+docs/build
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..5cb3e10
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,80 @@
+language: python
+sudo: false
+services:
+ - docker
+
+matrix:
+ fast_finish: true
+ include:
+ - python: "2.7"
+ - python: "3.4"
+ - python: "3.5"
+ env: HDFS=false
+ - python: "2.7"
+ env: HDFS=true
+ sudo: true
+ dist: trusty
+ - python: "3.5"
+ env: HDFS=true
+ sudo: true
+ dist: trusty
+
+before_install:
+ - |
+ if [[ $HDFS == true ]]; then
+ if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
+ export DOCKER_ENV=/opt/conda;
+ else
+ export DOCKER_ENV=/opt/conda/envs/py3;
+ fi
+ echo $DOCKER_ENV
+ pwd
+ pushd continuous_integration
+ docker build -t distributed-hdfs .
+ popd
+ docker run -d -p 8020:8020 -p 50070:50070 -v $(pwd):/distributed distributed-hdfs
+ export CONTAINER_ID=$(docker ps -l -q)
+ sleep 60 # Wait for namenode and datanode
+ fi;
+
+install:
+ # Install conda
+ - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+ - bash miniconda.sh -b -p $HOME/miniconda
+ - export PATH="$HOME/miniconda/bin:$PATH"
+ - conda config --set always_yes yes --set changeps1 no
+ - conda update conda
+
+ # Install dependencies
+ - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION
+ - source activate test-environment
+ - conda install pytest coverage tornado toolz dill futures dask ipywidgets psutil bokeh requests joblib mock ipykernel jupyter_client h5py netcdf4
+ - pip install git+https://github.com/dask/dask.git --upgrade
+ - pip install git+https://github.com/joblib/joblib.git --upgrade
+ - pip install git+https://github.com/dask/s3fs.git --upgrade
+
+ # Install distributed
+ - python setup.py install
+
+ - |
+ if [[ $HDFS == true ]]; then
+ pwd
+ docker exec -it $CONTAINER_ID $DOCKER_ENV/bin/python setup.py install
+ fi;
+
+script:
+ - |
+ if [[ $HDFS == true ]]; then
+ pwd
+ docker exec -it $CONTAINER_ID $DOCKER_ENV/bin/py.test distributed/tests/test_hdfs.py --verbose
+ elif [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then
+ coverage run $(which py.test) distributed -m "not avoid_travis" --verbose;
+ else
+ py.test -m "not avoid_travis" distributed --verbose;
+ fi;
+
+after_success:
+ - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then coverage report; pip install coveralls ; coveralls ; fi
+
+notifications:
+ email: false
diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000..66d2ce2
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,3 @@
+[Matthew Rocklin](http://matthewrocklin.com)
+
+[Michael Broxton](http://graphics.stanford.edu/~broxton/)
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..71ddeb2
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2015-2016, Continuum Analytics, Inc. and contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+Neither the name of Continuum Analytics nor the names of any contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..0b8c1b2
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,13 @@
+recursive-include distributed *.py
+recursive-include distributed *.js
+recursive-include distributed *.coffee
+recursive-include docs *.rst
+
+include setup.py
+include README.rst
+include LICENSE.txt
+include MANIFEST.in
+include requirements.txt
+include distributed/config.yaml
+
+prune docs/_build
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..2d2d285
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,6 @@
+Distributed
+===========
+
+A library for distributed computation. See documentation_ for more details.
+
+.. _documentation: https://distributed.readthedocs.io/en/latest
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..b5972ff
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,5 @@
+# https://pytest.org/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option
+import pytest
+def pytest_addoption(parser):
+ parser.addoption("--runslow", action="store_true", help="run slow tests")
+
diff --git a/continuous_integration/Dockerfile b/continuous_integration/Dockerfile
new file mode 100644
index 0000000..6d0bef4
--- /dev/null
+++ b/continuous_integration/Dockerfile
@@ -0,0 +1,43 @@
+FROM ubuntu:trusty
+
+# conda
+RUN apt-get update && apt-get install -y -q curl bzip2 git
+RUN curl https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -o /tmp/miniconda.sh
+RUN /bin/bash /tmp/miniconda.sh -b -p /opt/conda
+RUN rm /tmp/miniconda.sh
+ENV PATH /opt/conda/bin:$PATH
+
+# hdfs3 - python 2
+RUN apt-get install -y -q protobuf-compiler libprotobuf-dev
+ENV LIBHDFS3_CONF /etc/hadoop/conf/hdfs-site.xml
+RUN /opt/conda/bin/conda install -y -q libxml2 krb5 boost ipython pytest pip pandas cython mock
+RUN /opt/conda/bin/conda install -y -q libhdfs3 libgsasl libntlm -c dask
+RUN /opt/conda/bin/pip install git+https://github.com/dask/hdfs3 --upgrade
+RUN /opt/conda/bin/pip install git+https://github.com/dask/s3fs --upgrade
+RUN /opt/conda/bin/pip install git+https://github.com/blaze/dask --upgrade
+# hdfs3 - python 3
+RUN conda create -n py3 -y python=3
+RUN conda install -n py3 -y -q libxml2 krb5 boost ipython pytest pip pandas cython mock
+RUN conda install -n py3 libhdfs3 libgsasl libntlm -c dask
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/dask/hdfs3 --upgrade
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/dask/s3fs --upgrade
+RUN /opt/conda/envs/py3/bin/pip install git+https://github.com/blaze/dask --upgrade
+
+# Cloudera repositories
+RUN curl -s http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/archive.key | apt-key add -
+RUN echo 'deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' > /etc/apt/sources.list.d/cloudera.list
+RUN echo 'deb-src http://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh trusty-cdh5 contrib' >> /etc/apt/sources.list.d/cloudera.list
+ADD docker-files/cloudera.pref /etc/apt/preferences.d/cloudera.pref
+
+# Install CDH5 in a single node: Pseudo Distributed
+ADD docker-files/cdh5-install.sh /tmp/cdh5-install.sh
+RUN bash /tmp/cdh5-install.sh
+
+EXPOSE 8020
+EXPOSE 50070
+
+VOLUME /distributed
+WORKDIR /distributed
+
+ADD docker-files/start.sh /tmp/start.sh
+CMD ["bash", "/tmp/start.sh"]
diff --git a/continuous_integration/README.md b/continuous_integration/README.md
new file mode 100644
index 0000000..a3c1e4d
--- /dev/null
+++ b/continuous_integration/README.md
@@ -0,0 +1,37 @@
+# Continuous Integration
+
+## Local test
+
+Requirements:
+- `docker`
+
+Build the container:
+```bash
+docker build -t distributed-hdfs .
+```
+
+Start the container and wait for the it to be ready:
+
+```bash
+docker run -it -p 8020:8020 -p 50070:50070 -v $(pwd):/distributed distributed-hdfs
+```
+
+Now the port `8020` and `50070` are in the host are pointing to the container and the source code (as a shared volume) is available in the container under `/distributed`
+
+Run the following from the root of this project directory to start a bash
+session in the running container:
+
+```bash
+# Get the container ID
+export CONTAINER_ID=$(docker ps -l -q)
+
+# Start the bash session
+docker exec -it $CONTAINER_ID bash
+```
+
+Now that we are in the container we can install the library and run the test:
+
+```bash
+python setup.py install
+py.test distributed -s -vv
+```
diff --git a/continuous_integration/docker-files/cdh5-install.sh b/continuous_integration/docker-files/cdh5-install.sh
new file mode 100644
index 0000000..141aa0c
--- /dev/null
+++ b/continuous_integration/docker-files/cdh5-install.sh
@@ -0,0 +1,17 @@
+# Install CDH5 in a single node: Pseudo Distributed
+# Docs: http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cdh_qs_yarn_pseudo.html
+
+apt-get update && apt-get install -y -q openjdk-7-jre-headless hadoop-conf-pseudo
+
+# Step 1: Format the NameNode
+sudo -u hdfs hdfs namenode -format -force
+
+# Step 2: Start HDFS
+for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
+
+# Step 3: Create the directories needed for Hadoop processes
+bash /usr/lib/hadoop/libexec/init-hdfs.sh
+
+# Step 6: Create User Directories
+# sudo -u hdfs hdfs dfs -mkdir -p /user/hadoop
+# sudo -u hdfs hdfs dfs -chown hadoop /user/hadoop
diff --git a/continuous_integration/docker-files/cloudera.pref b/continuous_integration/docker-files/cloudera.pref
new file mode 100644
index 0000000..e5a1eae
--- /dev/null
+++ b/continuous_integration/docker-files/cloudera.pref
@@ -0,0 +1,3 @@
+Package: *
+Pin: release o=Cloudera, l=Cloudera
+Pin-Priority: 501
diff --git a/continuous_integration/docker-files/libhdfs-build.sh b/continuous_integration/docker-files/libhdfs-build.sh
new file mode 100644
index 0000000..6de7a8a
--- /dev/null
+++ b/continuous_integration/docker-files/libhdfs-build.sh
@@ -0,0 +1,8 @@
+cd /opt/libhdfs3
+
+mkdir build
+pushd build
+../bootstrap --prefix=/usr/local
+make
+make install
+popd
diff --git a/continuous_integration/docker-files/start.sh b/continuous_integration/docker-files/start.sh
new file mode 100644
index 0000000..5cd1a04
--- /dev/null
+++ b/continuous_integration/docker-files/start.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Start HDFS
+sudo service hadoop-hdfs-namenode start
+sudo service hadoop-hdfs-datanode start
+
+echo "Ready"
+
+# Block
+sleep infinity
diff --git a/distributed/__init__.py b/distributed/__init__.py
new file mode 100644
index 0000000..8edad9c
--- /dev/null
+++ b/distributed/__init__.py
@@ -0,0 +1,21 @@
+from __future__ import print_function, division, absolute_import
+
+from .config import config
+from .core import connect, read, write, rpc
+from .deploy import LocalCluster
+from .diagnostics import progress
+from .client import (Client, Executor, CompatibleExecutor, wait, as_completed,
+ default_client)
+from .nanny import Nanny
+from .scheduler import Scheduler
+from .utils import sync
+from .worker import Worker
+from .worker_client import local_client
+
+try:
+ from .collections import futures_to_collection
+except:
+ pass
+
+
+__version__ = '1.14.3'
diff --git a/distributed/_ipython_utils.py b/distributed/_ipython_utils.py
new file mode 100644
index 0000000..112e472
--- /dev/null
+++ b/distributed/_ipython_utils.py
@@ -0,0 +1,246 @@
+"""Utilities for integrating with IPython
+
+These functions should probably reside in Jupyter and IPython repositories,
+after which we can import them instead of having our own definitions.
+"""
+
+from __future__ import print_function
+
+import atexit
+import os
+try:
+ import queue
+except ImportError:
+ # Python 2
+ import Queue as queue
+from subprocess import Popen
+import sys
+from threading import Thread
+from uuid import uuid4
+
+from tornado.gen import TimeoutError
+from tornado.ioloop import IOLoop
+from threading import Event
+
+from IPython import get_ipython
+from jupyter_client import BlockingKernelClient, write_connection_file
+from jupyter_core.paths import jupyter_runtime_dir
+
+
+OUTPUT_TIMEOUT = 10
+
+
+def run_cell_remote(ip, kc, cell):
+ """Run a cell on a KernelClient
+
+ Any output from the cell will be redisplayed in the local session.
+ """
+ msg_id = kc.execute(cell)
+
+ in_kernel = getattr(ip, 'kernel', False)
+ if in_kernel:
+ socket = ip.display_pub.pub_socket
+ session = ip.display_pub.session
+ parent_header = ip.display_pub.parent_header
+
+ while True:
+ try:
+ msg = kc.get_iopub_msg(timeout=OUTPUT_TIMEOUT)
+ except queue.Empty:
+ raise TimeoutError("Timeout waiting for IPython output")
+
+ if msg['parent_header'].get('msg_id') != msg_id:
+ continue
+ msg_type = msg['header']['msg_type']
+ content = msg['content']
+ if msg_type == 'status':
+ if content['execution_state'] == 'idle':
+ # idle means output is done
+ break
+ elif msg_type == 'stream':
+ stream = getattr(sys, content['name'])
+ stream.write(content['text'])
+ elif msg_type in ('display_data', 'execute_result', 'error'):
+ if in_kernel:
+ session.send(socket, msg_type, content, parent=parent_header)
+ else:
+ if msg_type == 'error':
+ print('\n'.join(content['traceback']), file=sys.stderr)
+ else:
+ sys.stdout.write(content['data'].get('text/plain', ''))
+ else:
+ pass
+
+
+def register_worker_magic(connection_info, magic_name='worker'):
+ """Register a %worker magic, given connection_info.
+
+ Both a line and cell magic are registered,
+ which run the given cell in a remote kernel.
+ """
+ ip = get_ipython()
+ info = dict(connection_info) # copy
+ key = info.pop('key')
+ kc = BlockingKernelClient(**connection_info)
+ kc.session.key = key
+ kc.start_channels()
+ def remote(line, cell=None):
+ """Run the current cell on a remote IPython kernel"""
+ if cell is None:
+ # both line and cell magic
+ cell = line
+ run_cell_remote(ip, kc, cell)
+ remote.client = kc # preserve reference on kc, largely for mocking
+ ip.register_magic_function(remote, magic_kind='line', magic_name=magic_name)
+ ip.register_magic_function(remote, magic_kind='cell', magic_name=magic_name)
+
+
+def remote_magic(line, cell=None):
+ """A magic for running code on a specified remote worker
+
+ The connection_info dict of the worker will be looked up
+ as the first positional arg to the magic.
+ The rest of the line (or the entire cell for a %%cell magic)
+ will be passed to the remote kernel.
+
+ Usage:
+
+ info = e.start_ipython(worker)[worker]
+ %remote info print(worker.data)
+ """
+ # get connection info from IPython's user namespace
+ ip = get_ipython()
+ split_line = line.split(None, 1)
+ info_name = split_line[0]
+ if info_name not in ip.user_ns:
+ raise NameError(info_name)
+ connection_info = dict(ip.user_ns[info_name])
+
+ if not cell: # line magic, use the rest of the line
+ if len(split_line) == 1:
+ raise ValueError("I need some code to run!")
+ cell = split_line[1]
+
+ # turn info dict to hashable str for use as lookup key in _clients cache
+ key = ','.join(map(str, sorted(connection_info.items())))
+ session_key = connection_info.pop('key')
+
+ if key in remote_magic._clients:
+ kc = remote_magic._clients[key]
+ else:
+ kc = BlockingKernelClient(**connection_info)
+ kc.session.key = session_key
+ kc.start_channels()
+ kc.wait_for_ready(timeout=10)
+ remote_magic._clients[key] = kc
+
+ # actually run the code
+ run_cell_remote(ip, kc, cell)
+
+# cache clients for re-use in remote magic
+remote_magic._clients = {}
+
+
+def register_remote_magic(magic_name='remote'):
+ """Define the parameterized %remote magic
+
+ See remote_magic above for details.
+ """
+ ip = get_ipython()
+ if ip is None:
+ return # do nothing if IPython's not running
+ ip.register_magic_function(remote_magic, magic_kind='line', magic_name=magic_name)
+ ip.register_magic_function(remote_magic, magic_kind='cell', magic_name=magic_name)
+
+
+def connect_qtconsole(connection_info, name=None, extra_args=None):
+ """Open a QtConsole connected to a worker who has the given future
+
+ - identify worker with who_has
+ - start IPython kernel on the worker
+ - start qtconsole connected to the kernel
+ """
+ runtime_dir = jupyter_runtime_dir()
+ if name is None:
+ name = uuid4().hex
+
+ path = os.path.join(runtime_dir, name + '.json')
+ write_connection_file(path, **connection_info)
+ cmd = ['jupyter', 'qtconsole', '--existing', path]
+ if extra_args:
+ cmd.extend(extra_args)
+ Popen(cmd)
+ def _cleanup_connection_file():
+ """Cleanup our connection file when we exit."""
+ try:
+ os.remove(path)
+ except OSError:
+ pass
+ atexit.register(_cleanup_connection_file)
+
+
+def start_ipython(ip=None, ns=None, log=None):
+ """Start an IPython kernel in a thread
+
+ Parameters
+ ----------
+
+ ip: str
+ The IP address to listen on (likely the parent object's ip).
+ ns: dict
+ Any names that should be injected into the IPython namespace.
+ log: logger instance
+ Hook up IPython's logging to an existing logger instead of the default.
+ """
+ from IPython import get_ipython
+ if get_ipython() is not None:
+ raise RuntimeError("Cannot start IPython, it's already running.")
+
+ from zmq.eventloop.ioloop import ZMQIOLoop
+ from ipykernel.kernelapp import IPKernelApp
+ # save the global IOLoop instance
+ # since IPython relies on it, but we are going to put it in a thread.
+ save_inst = IOLoop.instance()
+ IOLoop.clear_instance()
+ zmq_loop = ZMQIOLoop()
+ zmq_loop.install()
+
+ # start IPython, disabling its signal handlers that won't work due to running in a thread:
+ app = IPKernelApp.instance(log=log)
+ # Don't connect to the history database
+ app.config.HistoryManager.hist_file = ':memory:'
+ # listen on all interfaces, so remote clients can connect:
+ if ip:
+ app.ip = ip
+ # disable some signal handling, logging
+ noop = lambda : None
+ app.init_signal = noop
+ app.log_connection_info = noop
+
+ # start IPython in a thread
+ # initialization happens in the thread to avoid threading problems
+ # with the sqlite history
+ evt = Event()
+ def _start():
+ app.initialize([])
+ app.kernel.pre_handler_hook = noop
+ app.kernel.post_handler_hook = noop
+ app.kernel.start()
+ app.kernel.loop = IOLoop.instance()
+ # save self in the IPython namespace as 'worker'
+ # inject things into the IPython namespace
+ if ns:
+ app.kernel.shell.user_ns.update(ns)
+ evt.set()
+ zmq_loop.start()
+
+ zmq_loop_thread = Thread(target=_start)
+ zmq_loop_thread.daemon = True
+ zmq_loop_thread.start()
+ assert evt.wait(timeout=5), "IPython didn't start in a reasonable amount of time."
+
+ # put the global IOLoop instance back:
+ IOLoop.clear_instance()
+ save_inst.install()
+ return app
+
diff --git a/distributed/batched.py b/distributed/batched.py
new file mode 100644
index 0000000..2aeabf6
--- /dev/null
+++ b/distributed/batched.py
@@ -0,0 +1,214 @@
+from __future__ import print_function, division, absolute_import
+
+from datetime import timedelta
+import logging
+from timeit import default_timer
+
+from tornado import gen
+from tornado.queues import Queue
+from tornado.iostream import StreamClosedError
+from tornado.ioloop import PeriodicCallback, IOLoop
+
+from .core import read, write
+from .utils import log_errors
+
+
+logger = logging.getLogger(__name__)
+
+
+class BatchedSend(object):
+ """ Batch messages in batches on a stream
+
+ This takes an IOStream and an interval (in ms) and ensures that we send no
+ more than one message every interval milliseconds. We send lists of
+ messages.
+
+ Example
+ -------
+ >>> stream = yield connect(ip, port)
+ >>> bstream = BatchedSend(interval=10) # 10 ms
+ >>> bstream.start(stream)
+ >>> bstream.send('Hello,')
+ >>> bstream.send('world!')
+
+ On the other side, the recipient will get a message like the following::
+
+ ['Hello,', 'world!']
+ """
+ def __init__(self, interval, loop=None):
+ self.loop = loop or IOLoop.current()
+ self.interval = interval / 1000.
+ self.last_transmission = 0
+ self.buffer = []
+ self.stream = None
+ self.last_payload = []
+ self.last_send = gen.sleep(0)
+ self.message_count = 0
+ self.batch_count = 0
+
+ def start(self, stream):
+ self.stream = stream
+ if self.buffer:
+ self.send_next()
+
+ def __str__(self):
+ return '<BatchedSend: %d in buffer>' % len(self.buffer)
+
+ __repr__ = __str__
+
+ @gen.coroutine
+ def send_next(self, wait=True):
+ try:
+ now = default_timer()
+ if wait:
+ wait_time = min(self.last_transmission + self.interval - now,
+ self.interval)
+ yield gen.sleep(wait_time)
+ yield self.last_send
+ self.buffer, payload = [], self.buffer
+ self.last_payload = payload
+ self.last_transmission = now
+ self.batch_count += 1
+ self.last_send = write(self.stream, payload)
+ except Exception as e:
+ logger.exception(e)
+ raise
+
+ @gen.coroutine
+ def _write(self, payload):
+ yield gen.sleep(0)
+ yield write(self.stream, payload)
+
+ def send(self, msg):
+ """ Send a message to the other side
+
+ This completes quickly and synchronously
+ """
+ try:
+ self.message_count += 1
+ if self.stream is None: # not yet started
+ self.buffer.append(msg)
+ return
+
+ if self.stream._closed:
+ raise StreamClosedError()
+
+ if self.buffer:
+ self.buffer.append(msg)
+ return
+
+ # If we're new and early,
+ now = default_timer()
+ if (now < self.last_transmission + self.interval
+ or not self.last_send._done):
+ self.buffer.append(msg)
+ self.loop.add_callback(self.send_next)
+ return
+
+ self.buffer.append(msg)
+ self.loop.add_callback(self.send_next, wait=False)
+ except StreamClosedError:
+ raise
+ except Exception as e:
+ logger.exception(e)
+
+ @gen.coroutine
+ def close(self, ignore_closed=False):
+ """ Flush existing messages and then close stream """
+ try:
+ if self.stream._write_buffer:
+ yield self.last_send
+ if self.buffer:
+ self.buffer, payload = [], self.buffer
+ yield write(self.stream, payload)
+ except StreamClosedError:
+ if not ignore_closed:
+ raise
+ self.stream.close()
+
+
+class BatchedStream(object):
+ """ Mostly obsolete, see BatchedSend """
+ def __init__(self, stream, interval):
+ self.stream = stream
+ self.interval = interval / 1000.
+ self.last_transmission = default_timer()
+ self.send_q = Queue()
+ self.recv_q = Queue()
+ self._background_send_coroutine = self._background_send()
+ self._background_recv_coroutine = self._background_recv()
+ self._broken = None
+
+ self.pc = PeriodicCallback(lambda: None, 100)
+ self.pc.start()
+
+ @gen.coroutine
+ def _background_send(self):
+ with log_errors():
+ while True:
+ msg = yield self.send_q.get()
+ if msg == 'close':
+ break
+ msgs = [msg]
+ now = default_timer()
... 34349 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/dask.distributed.git
More information about the Python-modules-commits
mailing list