[med-svn] [python-cgcloud] 01/02: New upstream version 1.6.0
Steffen Möller
moeller at moszumanska.debian.org
Sun Jan 15 01:06:55 UTC 2017
This is an automated email from the git hooks/post-receive script.
moeller pushed a commit to branch master
in repository python-cgcloud.
commit 31f2de42099498d0f11481843ca6b91246a22ed5
Author: Steffen Moeller <moeller at debian.org>
Date: Sun Jan 15 01:54:40 2017 +0100
New upstream version 1.6.0
---
.gitignore | 8 +
LICENSE | 15 +
MANIFEST.in | 3 +
Makefile | 163 ++
README.md | 79 +
agent/.gitignore | 6 +
agent/main.py | 12 +
agent/setup.cfg | 5 +
agent/setup.py | 31 +
agent/src/cgcloud/__init__.py | 1 +
agent/src/cgcloud/agent/__init__.py | 182 +++
agent/src/cgcloud/agent/cli.py | 217 +++
agent/src/cgcloud/agent/init-script.lsb | 91 ++
agent/src/cgcloud/agent/init-script.systemd | 19 +
agent/src/cgcloud/agent/init-script.upstart | 16 +
core/.gitignore | 6 +
core/README.rst | 393 +++++
core/main.py | 12 +
core/setup.cfg | 5 +
core/setup.py | 32 +
core/src/cgcloud/__init__.py | 1 +
core/src/cgcloud/core/__init__.py | 50 +
core/src/cgcloud/core/agent_box.py | 185 +++
core/src/cgcloud/core/apache.py | 65 +
core/src/cgcloud/core/box.py | 1634 ++++++++++++++++++++
core/src/cgcloud/core/centos_box.py | 133 ++
core/src/cgcloud/core/cli.py | 135 ++
core/src/cgcloud/core/cloud_init_box.py | 254 +++
core/src/cgcloud/core/cluster.py | 147 ++
core/src/cgcloud/core/cluster_commands.py | 410 +++++
core/src/cgcloud/core/commands.py | 885 +++++++++++
core/src/cgcloud/core/common_iam_policies.py | 23 +
core/src/cgcloud/core/deprecated.py | 8 +
core/src/cgcloud/core/docker_box.py | 117 ++
core/src/cgcloud/core/fedora_box.py | 76 +
core/src/cgcloud/core/generic_boxes.py | 303 ++++
core/src/cgcloud/core/init_box.py | 76 +
core/src/cgcloud/core/mesos_box.py | 45 +
core/src/cgcloud/core/package_manager_box.py | 166 ++
core/src/cgcloud/core/project.py | 57 +
core/src/cgcloud/core/rc_local_box.py | 154 ++
core/src/cgcloud/core/source_control_client.py | 32 +
core/src/cgcloud/core/task.py | 23 +
core/src/cgcloud/core/test/__init__.py | 108 ++
core/src/cgcloud/core/test/conftest.py | 14 +
core/src/cgcloud/core/test/test_core.py | 66 +
core/src/cgcloud/core/ubuntu_box.py | 164 ++
core/src/cgcloud/core/version.py | 1 +
core/src/cgcloud/core/yum_box.py | 78 +
core/src/cgcloud/fabric/__init__.py | 0
core/src/cgcloud/fabric/operations.py | 233 +++
core/tests.py | 7 +
jenkins.sh | 20 +
jenkins/.gitignore | 6 +
jenkins/README.rst | 190 +++
jenkins/setup.cfg | 5 +
jenkins/setup.py | 20 +
jenkins/src/cgcloud/__init__.py | 1 +
jenkins/src/cgcloud/jenkins/__init__.py | 21 +
.../src/cgcloud/jenkins/cgcloud_jenkins_slave.py | 81 +
jenkins/src/cgcloud/jenkins/commands.py | 40 +
.../src/cgcloud/jenkins/docker_jenkins_slave.py | 20 +
.../src/cgcloud/jenkins/generic_jenkins_slaves.py | 153 ++
jenkins/src/cgcloud/jenkins/jenkins_master.py | 330 ++++
jenkins/src/cgcloud/jenkins/jenkins_slave.py | 138 ++
.../src/cgcloud/jenkins/rpmbuild_jenkins_slaves.py | 56 +
jenkins/src/cgcloud/jenkins/s3am_jenkins_slave.py | 79 +
jenkins/src/cgcloud/jenkins/test/__init__.py | 1 +
jenkins/src/cgcloud/jenkins/test/conftest.py | 1 +
.../src/cgcloud/jenkins/test/create_all_slaves.py | 338 ++++
jenkins/src/cgcloud/jenkins/toil_jenkins_slave.py | 381 +++++
lib/.gitignore | 6 +
lib/setup.cfg | 6 +
lib/setup.py | 21 +
lib/src/cgcloud/__init__.py | 1 +
lib/src/cgcloud/lib/__init__.py | 5 +
lib/src/cgcloud/lib/context.py | 864 +++++++++++
lib/src/cgcloud/lib/ec2.py | 428 +++++
lib/src/cgcloud/lib/message.py | 54 +
lib/src/cgcloud/lib/test/__init__.py | 47 +
lib/src/cgcloud/lib/util.py | 887 +++++++++++
lib/src/cgcloud_Crypto/Hash/MD5.py | 92 ++
lib/src/cgcloud_Crypto/Hash/__init__.py | 115 ++
lib/src/cgcloud_Crypto/IO/PEM.py | 90 ++
lib/src/cgcloud_Crypto/IO/PKCS8.py | 146 ++
lib/src/cgcloud_Crypto/IO/__init__.py | 32 +
lib/src/cgcloud_Crypto/PublicKey/RSA.py | 503 ++++++
lib/src/cgcloud_Crypto/PublicKey/__init__.py | 41 +
lib/src/cgcloud_Crypto/PublicKey/_slowmath.py | 70 +
lib/src/cgcloud_Crypto/Util/__init__.py | 44 +
lib/src/cgcloud_Crypto/Util/asn1.py | 899 +++++++++++
lib/src/cgcloud_Crypto/Util/number.py | 156 ++
lib/src/cgcloud_Crypto/Util/py3compat.py | 110 ++
lib/src/cgcloud_Crypto/__init__.py | 51 +
lib/src/cgcloud_Crypto/pct_warnings.py | 63 +
mesos-tools/.gitignore | 6 +
mesos-tools/setup.cfg | 5 +
mesos-tools/setup.py | 22 +
mesos-tools/src/cgcloud/__init__.py | 1 +
mesos-tools/src/cgcloud/mesos_tools/__init__.py | 396 +++++
mesos/.gitignore | 6 +
mesos/LICENSE | 202 +++
mesos/README.rst | 74 +
mesos/setup.cfg | 5 +
mesos/setup.py | 21 +
mesos/src/cgcloud/__init__.py | 1 +
mesos/src/cgcloud/mesos/__init__.py | 8 +
mesos/src/cgcloud/mesos/mesos_box.py | 309 ++++
mesos/src/cgcloud/mesos/mesos_cluster.py | 12 +
mesos/src/cgcloud/mesos/test/__init__.py | 27 +
mesos/src/cgcloud/mesos/test/conftest.py | 1 +
mesos/src/cgcloud/mesos/test/test_mesos.py | 67 +
run_tests.py | 93 ++
spark-tools/.gitignore | 6 +
spark-tools/setup.cfg | 5 +
spark-tools/setup.py | 22 +
spark-tools/src/cgcloud/__init__.py | 1 +
spark-tools/src/cgcloud/spark_tools/__init__.py | 482 ++++++
spark/.gitignore | 6 +
spark/README.rst | 108 ++
spark/setup.cfg | 5 +
spark/setup.py | 21 +
spark/src/cgcloud/__init__.py | 1 +
spark/src/cgcloud/spark/__init__.py | 8 +
spark/src/cgcloud/spark/spark_box.py | 445 ++++++
spark/src/cgcloud/spark/spark_cluster.py | 12 +
spark/src/cgcloud/spark/test/__init__.py | 0
spark/src/cgcloud/spark/test/conftest.py | 1 +
spark/src/cgcloud/spark/test/test_spark.py | 133 ++
toil/.gitignore | 6 +
toil/README.rst | 77 +
toil/setup.py | 20 +
toil/src/cgcloud/__init__.py | 1 +
toil/src/cgcloud/toil/__init__.py | 12 +
toil/src/cgcloud/toil/test/__init__.py | 1 +
toil/src/cgcloud/toil/test/conftest.py | 1 +
toil/src/cgcloud/toil/test/test_toil.py | 169 ++
toil/src/cgcloud/toil/toil_box.py | 196 +++
toil/src/cgcloud/toil/toil_cluster.py | 12 +
version.py | 21 +
140 files changed, 16586 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3742912
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+/.idea
+*.pyc
+*.egg
+.eggs/
+nosetests.xml
+/venv/
+.cache
+__pycache__
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3f330b9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,15 @@
+Copyright (C) 2011-15 by UCSC Computational Genomics Lab
+
+Contributors: Hannes Schmidt, Christopher Ketchum
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..68ebe16
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include version.py
+# http://bugs.python.org/issue12885 (I think)
+include ./version.py
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..53d1e89
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,163 @@
+# Copyright (C) 2015 UCSC Computational Genomics Lab
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+define help
+
+Supported targets: 'develop', 'sdist', 'clean', 'test' and 'pypi'
+
+The 'develop' target creates an editable install (aka develop mode).
+
+The 'sdist' target creates source distributions for each of the subprojects.
+
+The 'clean' target undoes the effect of 'sdist' and 'develop'.
+
+The 'test' target runs the unit tests.
+
+The 'pypi' target publishes the current commit of the project to PyPI after
+asserting that it is being invoked on a continuous integration server, that the
+working copy and the index are clean and ensuring .
+
+endef
+export help
+.PHONY: help
+help:
+ @echo "$$help"
+
+
+python=python2.7
+tests=src
+
+develop_projects=lib core jenkins spark mesos toil
+sdist_projects=lib agent spark-tools mesos-tools
+all_projects=lib core agent jenkins spark spark-tools mesos mesos-tools toil
+
+green=\033[0;32m
+normal=\033[0m
+red=\033[0;31m
+
+
+.SUFFIXES:
+
+
+define _develop
+.PHONY: develop_$1
+develop_$1: _check_venv $1/version.py $1/MANIFEST.in
+ cd $1 && $(python) setup.py egg_info develop
+endef
+$(foreach project,$(develop_projects),$(eval $(call _develop,$(project))))
+.PHONY: develop
+develop: $(foreach project,$(develop_projects),develop_$(project))
+
+# Mirrors the intra-project dependencies declared in each setup.py
+
+develop_agent: develop_lib
+develop_core: develop_lib
+develop_jenkins: develop_lib develop_core
+develop_mesos: develop_lib develop_core
+develop_spark: develop_lib develop_core
+develop_toil: develop_lib develop_core develop_mesos
+
+define _sdist
+.PHONY: sdist_$1
+sdist_$1: _check_venv $1/version.py $1/MANIFEST.in
+ cd $1 && $(python) setup.py sdist
+endef
+$(foreach project,$(sdist_projects),$(eval $(call _sdist,$(project))))
+.PHONY: sdist
+sdist: $(foreach project,$(sdist_projects),sdist_$(project))
+
+
+define _pypi
+.PHONY: pypi_$1
+pypi_$1: _check_venv _check_running_on_jenkins _check_clean_working_copy $1/version.py $1/MANIFEST.in
+ test "$$$$ghprbActualCommit" \
+ && echo "We're building a PR, skipping PyPI." || ( \
+ cd $1 && $(python) setup.py egg_info sdist bdist_egg upload )
+endef
+$(foreach project,$(all_projects),$(eval $(call _pypi,$(project))))
+.PHONY: pypi
+pypi: $(foreach project,$(all_projects),pypi_$(project))
+
+
+define _clean
+.PHONY: clean_$1
+# clean depends on version.py since it invokes setup.py
+clean_$1: _check_venv $1/version.py
+ cd $1 && $(python) setup.py clean --all && rm -rf dist src/*.egg-info MANIFEST.in version.py version.pyc
+endef
+$(foreach project,$(all_projects),$(eval $(call _clean,$(project))))
+.PHONY: clean
+clean: $(foreach project,$(all_projects),clean_$(project))
+
+
+define _undevelop
+.PHONY: undevelop_$1
+# develop depends on version.py since it invokes setup.py
+undevelop_$1: _check_venv $1/version.py
+ cd $1 && $(python) setup.py develop -u
+endef
+$(foreach project,$(all_projects),$(eval $(call _undevelop,$(project))))
+.PHONY: undevelop
+undevelop: $(foreach project,$(develop_projects),undevelop_$(project))
+
+
+define _test
+.PHONY: test_$1
+test_$1: _check_venv _check_pytest
+ cd $1 && $(python) ../run_tests.py "$$(tests)"
+ @echo "$(green)Tests succeeded.$(normal)"
+endef
+$(foreach project,$(develop_projects),$(eval $(call _test,$(project))))
+.PHONY: test
+test: $(foreach project,$(develop_projects),test_$(project))
+
+
+.PHONY: _check_venv
+_check_venv:
+ @$(python) -c 'import sys; sys.exit( int( not hasattr(sys, "real_prefix") ) )' \
+ || ( echo "$(red)A virtualenv must be active.$(normal)" ; false )
+
+
+.PHONY: _check_pytest
+_check_pytest: _check_venv
+ $(python) -c 'import pytest' \
+ || ( echo "$(red)The 'pytest' distribution must be installed.$(normal)" ; false )
+
+
+.PHONY: _check_clean_working_copy
+_check_clean_working_copy:
+ @echo "$(green)Checking if your working copy is clean ...$(normal)"
+ @git diff --exit-code > /dev/null \
+ || ( echo "$(red)Your working copy looks dirty.$(normal)" ; false )
+ @git diff --cached --exit-code > /dev/null \
+ || ( echo "$(red)Your index looks dirty.$(normal)" ; false )
+ @test -z "$$(git ls-files --other --exclude-standard --directory)" \
+ || ( echo "$(red)You have are untracked files:$(normal)" \
+ ; git ls-files --other --exclude-standard --directory \
+ ; false )
+
+
+.PHONY: _check_running_on_jenkins
+_check_running_on_jenkins:
+ @echo "$(green)Checking if running on Jenkins ...$(normal)"
+ test -n "$$BUILD_NUMBER" \
+ || ( echo "$(red)This target should only be invoked on Jenkins.$(normal)" ; false )
+
+
+%/version.py: version.py
+ $(python) $< > $@
+
+
+%/MANIFEST.in: MANIFEST.in
+ cp $< $@
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8ddcdbf
--- /dev/null
+++ b/README.md
@@ -0,0 +1,79 @@
+CGCloud lets you automate the creation, management and provisioning of VMs and
+clusters of VMs in Amazon EC2. While allowing for easy programmatic
+customization of VMs in developement, it also provides rock-solid
+reproducability in production.
+
+Features
+========
+
+ * Works with base images of all actively supported releases of Ubuntu and
+ Fedora, and some releases of CentOS
+
+ * Lets you share VMs between multiple users, keeping the set of authorized SSH
+ keys synchronized on all VMs in real-time as users/keypairs are added or
+ removed from AWS.
+
+ * Offers isolation between users, teams and deployments via namespaces
+
+ * Lets you stand up a distributed, continuous integration infrastructure using
+ one long-running Jenkins master and multiple on-demand Jenkins slaves
+
+ * Lets you create an HDFS-backed Apache Spark cluster of any number of nodes
+ in just three minutes, independently of the number of nodes, with our
+ without attached EBS volumes
+
+ * Lets you create a Mesos cluster of any number of Nodes
+
+ * Supports running Spark, Mesos and Toil workers on the spot market
+
+ * Is easily extensible via a simple plugin architecture
+
+ * VMs created by CGCloud optionally report memory and disk utilization as
+ custom CloudWatch metrics
+
+So what does it not offer? What are its limitations? First and foremost, it is
+strictly tied to AWS and EC2. Other cloud providers are not supported and
+probably will not be in the near future. It does not have a GUI. It is written
+in Python and if you want to customize it, you will need to know Python. It
+makes extreme use of inheritance, multiple inheritance, actually. Some people
+frown at that since it will make it likely that your own customizations break
+between releases of CGCloud. While allowing CGCloud to be extremely
+[DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself), multiple
+inheritance also increases the complexity and steepens the learning curve.
+
+Where to go from here?
+======================
+
+If you are a (potential) **user** of CGCloud, head on over to the [CGCloud Core
+README](core/README.rst) and then move on to
+
+ * [CGCloud Jenkins](jenkins/README.rst)
+
+ * [CGCloud Spark](spark/README.rst)
+
+ * [CGCloud Mesos](mesos/README.rst)
+
+ * [CGCloud Toil](toil/README.rst)
+
+If you are a **developer**, make sure you have pip and virtualenv, clone this
+repository and perform the following steps from the project root::
+
+ virtualenv venv
+ source venv/bin/activate
+ make develop sdist
+
+That will set up the project in development mode inside a virtualenv and create
+source distributions (aka sdists) for those components that are be installed on
+remote boxes. In development mode, these components are not installed from PyPI
+but are instead directly uploaded to the box in sdist form and then installed
+from the sdist.
+
+After pulling changes from the remote, you need to run `make develop sdist` again.
+This step is easy to forget because you often get by without it.
+
+Specifically, `make develop` is necessary after any of the setup.py or
+version.py files have changed. And `make sdist` is necessary after changes to
+the agent, spark-tools or mesos-tools subprojects. Otherwise, `cgcloud create`
+will install a stale version of these on the remote box.
+
+To run the unittests, `pip install pytest` and then do `make test`.
diff --git a/agent/.gitignore b/agent/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/agent/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/agent/main.py b/agent/main.py
new file mode 100644
index 0000000..d142456
--- /dev/null
+++ b/agent/main.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python2.7
+
+import os
+
+import sys
+
+sys.path.append( os.path.join( os.path.dirname( __file__ ), 'src', 'main' ) )
+
+from cgcloud.agent.cli import main
+
+if __name__ == "__main__":
+ main( )
diff --git a/agent/setup.cfg b/agent/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/agent/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/agent/setup.py b/agent/setup.py
new file mode 100644
index 0000000..fadf895
--- /dev/null
+++ b/agent/setup.py
@@ -0,0 +1,31 @@
+from __future__ import absolute_import
+
+import sys
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, boto_dep
+
+setup(
+ name='cgcloud-agent',
+ version=cgcloud_version,
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Management of ~/.ssh/authorized_keys for a fleet of EC2 instances',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ package_data={
+ 'cgcloud.agent': [ 'init-script.*' ] },
+ entry_points={
+ 'console_scripts': [
+ 'cgcloudagent = cgcloud.agent.cli:main' ], },
+ install_requires=filter( None, [
+ bd2k_python_lib_dep,
+ 'cgcloud-lib==' + cgcloud_version,
+ boto_dep,
+ 'python-daemon==2.0.6',
+ 'argparse==1.4.0' if sys.version_info < (2, 7) else None ] ) )
diff --git a/agent/src/cgcloud/__init__.py b/agent/src/cgcloud/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/agent/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/agent/src/cgcloud/agent/__init__.py b/agent/src/cgcloud/agent/__init__.py
new file mode 100644
index 0000000..f6643e8
--- /dev/null
+++ b/agent/src/cgcloud/agent/__init__.py
@@ -0,0 +1,182 @@
+from contextlib import contextmanager
+import logging
+import errno
+import os
+import tempfile
+import pwd
+import threading
+
+from boto.sqs.message import RawMessage
+from bd2k.util.throttle import LocalThrottle
+import time
+
+from cgcloud.lib.context import Context
+from cgcloud.lib.message import Message, UnknownVersion
+from cgcloud.lib.util import UserError
+
+log = logging.getLogger( __name__ )
+
+
+class Agent( object ):
+ """
+ The agent is a daemon process running on every EC2 instance of AgentBox.
+ """
+
+ def __init__( self, ctx, options ):
+ """
+ :type ctx: Context
+ """
+ super( Agent, self ).__init__( )
+ self.ctx = ctx
+ self.options = options
+ self.fingerprints = None
+
+ queue_name = self.ctx.to_aws_name( self.ctx.agent_queue_name )
+ self.queue = self.ctx.sqs.get_queue( queue_name )
+ if self.queue is None:
+ # The create_queue API call handles races gracefully,
+ # the conditional above is just an optimization.
+ self.queue = self.ctx.sqs.create_queue( queue_name )
+ self.queue.set_message_class( RawMessage )
+ self.ctx.sns.subscribe_sqs_queue( ctx.agent_topic_arn, self.queue )
+
+ def run( self ):
+ throttle = LocalThrottle( min_interval=self.options.interval )
+ # First call always returns immediately
+ throttle.throttle( )
+ # Always update keys initially
+ self.update_ssh_keys( )
+ self.start_metric_thread( )
+ while True:
+ # Do 'long' (20s) polling for messages
+ messages = self.queue.get_messages( num_messages=10, # the maximum permitted
+ wait_time_seconds=20, # ditto
+ visibility_timeout=10 )
+ if messages:
+ # Process messages, combining multiple messages of the same type
+ update_ssh_keys = False
+ for sqs_message in messages:
+ try:
+ message = Message.from_sqs( sqs_message )
+ except UnknownVersion as e:
+ log.warning( 'Ignoring message with unkown version' % e.version )
+ else:
+ if message.type == Message.TYPE_UPDATE_SSH_KEYS:
+ update_ssh_keys = True
+ if update_ssh_keys:
+ self.update_ssh_keys( )
+ # Greedily consume all accrued messages
+ self.queue.delete_message_batch( messages )
+ else:
+ # Without messages, update if throttle interval has passed
+ if throttle.throttle( wait=False ):
+ self.update_ssh_keys( )
+
+ def make_dir( self, path, mode, uid, gid ):
+ try:
+ os.mkdir( path, mode )
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ else:
+ os.chown( path, uid, gid )
+
+ @contextmanager
+ def make_file( self, path, mode, uid, gid ):
+ """
+ Atomically create a file at the given path. To be used as a context manager that yields
+ a file handle for writing to.
+ """
+ dir_path, file_name = os.path.split( path )
+ with tempfile.NamedTemporaryFile( prefix=file_name + '.',
+ dir=dir_path,
+ delete=False ) as temp_file:
+ yield temp_file
+ os.chmod( temp_file.name, mode )
+ os.chown( temp_file.name, uid, gid )
+ os.rename( temp_file.name, path )
+
+ def update_ssh_keys( self ):
+ keypairs = self.ctx.expand_keypair_globs( self.options.ec2_keypair_names )
+ fingerprints = set( keypair.fingerprint for keypair in keypairs )
+ if fingerprints != self.fingerprints:
+ ssh_keys = set( self.download_ssh_key( keypair ) for keypair in keypairs )
+ if None in ssh_keys: ssh_keys.remove( None )
+
+ for account in self.options.accounts:
+ pw = pwd.getpwnam( account )
+ dot_ssh_path = os.path.join( pw.pw_dir, '.ssh' )
+ self.make_dir( dot_ssh_path, 00755, pw.pw_uid, pw.pw_gid )
+ authorized_keys_path = os.path.join( dot_ssh_path, 'authorized_keys' )
+ try:
+ with open( authorized_keys_path ) as f:
+ local_ssh_keys = set(
+ l.strip( ) for l in f.readlines( ) if not l.isspace( ) )
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ local_ssh_keys = None
+ else:
+ raise
+ if local_ssh_keys != ssh_keys:
+ with self.make_file( authorized_keys_path, 00644, pw.pw_uid,
+ pw.pw_gid ) as authorized_keys:
+ authorized_keys.writelines( ssh_key + '\n' for ssh_key in ssh_keys )
+ self.fingerprints = fingerprints
+
+ def download_ssh_key( self, keypair ):
+ try:
+ return self.ctx.download_ssh_pubkey( keypair ).strip( )
+ except UserError:
+ log.warn( 'Exception while downloading SSH public key from S3.', exc_info=True )
+ return None
+
+ def start_metric_thread( self ):
+ try:
+ import psutil
+ except ImportError:
+ pass
+ else:
+ t = threading.Thread( target=self.metric_thread )
+ t.daemon = True
+ t.start( )
+
+ def metric_thread( self ):
+ """
+ Collects memory and disk usage as percentages via psutil and adds them as Cloudwatch
+ metrics. Any "3" type instance assumes ephemeral (/mnt/ephemeral) is primary storage.
+ Metrics are updated every 5 minutes under the 'AWS/EC2' Namespace.
+
+ Resource Metric Name
+ -------- -----------
+ Memory MemUsage
+ Disk DiskUsage_root or DiskUsage_<mount_point>
+ """
+ import psutil
+ from boto.ec2 import cloudwatch
+ from boto.utils import get_instance_metadata
+ metadata = get_instance_metadata( )
+ instance_id = metadata[ 'instance-id' ]
+ region = metadata[ 'placement' ][ 'availability-zone' ][ 0:-1 ]
+ while True:
+ # Collect memory metrics
+ memory_percent = psutil.virtual_memory( ).percent
+ metrics = { 'MemUsage': memory_percent }
+ # Collect disk metrics
+ for partition in psutil.disk_partitions( ):
+ mountpoint = partition.mountpoint
+ if mountpoint == '/':
+ metrics[ 'DiskUsage_root' ] = psutil.disk_usage( mountpoint ).percent
+ else:
+ metrics[ 'DiskUsage' + mountpoint.replace( '/', '_' ) ] = psutil.disk_usage(
+ mountpoint ).percent
+ # Send metrics
+ cw = cloudwatch.connect_to_region( region )
+ try:
+ cw.put_metric_data( 'CGCloud', metrics.keys( ), metrics.values( ),
+ unit='Percent', dimensions={ "InstanceId": instance_id } )
+ finally:
+ cw.close( )
+ cw = None
+ time.sleep( 300 )
diff --git a/agent/src/cgcloud/agent/cli.py b/agent/src/cgcloud/agent/cli.py
new file mode 100755
index 0000000..dc7d2ca
--- /dev/null
+++ b/agent/src/cgcloud/agent/cli.py
@@ -0,0 +1,217 @@
+import os
+import sys
+import argparse
+import platform
+import itertools
+import logging
+from logging.handlers import SysLogHandler, SYSLOG_UDP_PORT
+
+import daemon
+from bd2k.util.logging import Utf8SyslogFormatter
+from bd2k.util import uid_to_name, gid_to_name, name_to_uid, name_to_gid, shell
+from bd2k.util.lockfile import SmartPIDLockFile
+from bd2k.util.throttle import LocalThrottle
+
+from cgcloud.lib.context import Context
+from cgcloud.agent import Agent
+
+log = logging.getLogger( )
+
+description = "The CGHub Cloud Agent daemon"
+
+exec_path = os.path.abspath( sys.argv[ 0 ] )
+exec_name = os.path.basename( exec_path )
+
+
+def main( ):
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description=description )
+ group = parser.add_argument_group( title='functional options' )
+ group.add_argument( '--namespace', '-n', metavar='PREFIX',
+ required=True,
+ help='Optional prefix for naming EC2 resource like instances, images, '
+ 'volumes, etc. Use this option to create a separate namespace in '
+ 'order to avoid collisions, e.g. when running tests. The value of '
+ 'the environment variable CGCLOUD_NAMESPACE, if that variable is '
+ 'present, overrides the default. The string __me__ anywhere in the '
+ 'namespace will be replaced by the name of the IAM user whose '
+ 'credentials are used to issue requests to AWS.' )
+ default_zone = os.environ.get( 'CGCLOUD_ZONE', None )
+ group.add_argument( '--zone', '-z', metavar='AVAILABILITY_ZONE',
+ default=default_zone,
+ required=not default_zone,
+ dest='availability_zone',
+ help='The name of the EC2 availability zone to operate in, '
+ 'e.g. us-east-1a, us-west-1b or us-west-2c etc. This argument '
+ 'implies the AWS region to run in. The value of the environment '
+ 'variable CGCLOUD_ZONE, if that variable is present, overrides the '
+ 'default.' )
+ group.add_argument( '--interval', '-i', metavar='SECONDS',
+ default=300, type=int,
+ help='' )
+ group.add_argument( '--accounts', metavar='PATH', nargs='+',
+ default=[ uid_to_name( os.getuid( ) ) ],
+ help="The names of user accounts whose .ssh/authorized_keys file should "
+ "be managed by this agent. Note that managing another user's "
+ ".ssh/authorized_keys typically requires running the agent as root." )
+ default_ec2_keypair_names = os.environ.get( 'CGCLOUD_KEYPAIRS', '' ).split( )
+ group.add_argument( '--keypairs', '-k', metavar='EC2_KEYPAIR_NAME',
+ dest='ec2_keypair_names', nargs='+',
+ required=not default_ec2_keypair_names,
+ default=default_ec2_keypair_names,
+ help='The names or name patterns of EC2 key pairs whose public key is to '
+ 'be to maintained in the ~/.ssh/authorized_keys files of each '
+ 'account listed in the --accounts option. Each argument may be a '
+ 'literal name of a keypairs or a shell-style glob in which case '
+ 'every key pair whose name matches that glob will be deployed '
+ 'to the box. The value of the environment variable CGCLOUD_KEYPAIRS, '
+ 'if that variable is present, overrides the default.' )
+
+ group = parser.add_argument_group( title='process options' )
+ group.add_argument( '--debug', '-X', default=False, action='store_true',
+ help="Run in debug mode without daemonizing. All other process options "
+ "will be ignored." )
+ group.add_argument( '--user', '-u', metavar='UID',
+ default=uid_to_name( os.getuid( ) ),
+ help='The name of the user to run the daemon as.' )
+ group.add_argument( '--group', '-g', metavar='GID',
+ default=gid_to_name( os.getgid( ) ),
+ help='The name of the group to run the daemon as.' )
+ group.add_argument( '--pid-file', '-p', metavar='PATH',
+ default='./%s.pid' % exec_name,
+ help="The path of the file to which the daemon's process ID will be "
+ "written." )
+ log_levels = [ logging.getLevelName( level ) for level in
+ ( logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG ) ]
+ group.add_argument( '--log-level', default=logging.getLevelName( logging.INFO ),
+ choices=log_levels, help="The default log level." )
+ group.add_argument( '--log-spill', metavar='PATH',
+ default='./%s.log' % exec_name,
+ help="The path of the file to which the daemon's stderr and stdout will "
+ "be redirected. Most of the diagnostic output will go to syslog but "
+ "some might spill over to stderr or stdout, especially on errors "
+ "during daemonization." )
+
+ group = parser.add_argument_group( title='miscellaeneous options' )
+ group.add_argument( '--init-script', default=False, action='store_true',
+ help='Instead of starting the daemon, generate an /etc/init.d script for '
+ '%s using the specified options and exit. One would typically '
+ 'redirect the output to a file, move that file into place, '
+ 'make it executable and run chkconfig to '
+ 'update the run levels.' % exec_name )
+
+ group.add_argument( '--init', metavar='NAME', default=None, required=False,
+ choices=[ 'sysv', 'upstart', 'systemd' ],
+ help="The init system invoking this program. This parameter is only "
+ "needed when this program is run as a service under the auspices of "
+ "a init daemon." )
+
+ options = parser.parse_args( )
+
+ # The lock file path will be evaluated by DaemonContext after the chdir to /,
+ # so we need to convert a relative path to an absolute one. Also, the init script generation
+ # should not use relative paths.
+ options.pid_file = os.path.abspath( options.pid_file )
+ options.log_spill = os.path.abspath( options.log_spill )
+
+ if options.init_script:
+ generate_init_script( options )
+ sys.exit( 0 )
+
+ def run( ):
+ log.info( "Entering main loop." )
+ ctx = Context( availability_zone=options.availability_zone, namespace=options.namespace )
+ throttle = LocalThrottle( min_interval=options.interval )
+ for i in itertools.count( ):
+ throttle.throttle( )
+ try:
+ log.info( "Starting run %i.", i )
+ Agent( ctx, options ).run( )
+ log.info( "Completed run %i.", i )
+ except (SystemExit, KeyboardInterrupt):
+ log.info( 'Terminating.' )
+ break
+ except:
+ log.exception( 'Abandoning run due to exception' )
+
+ formatter = Utf8SyslogFormatter(
+ '%s[%%(process)d]: [%%(levelname)s] %%(threadName)s %%(name)s: %%(message)s' % exec_name )
+ if options.debug:
+ handler = logging.StreamHandler( sys.stderr )
+ handler.setFormatter( formatter )
+ log.addHandler( handler )
+ log.setLevel( logging.DEBUG )
+ run( )
+ else:
+ system = platform.system( )
+ if system in ( 'Darwin', 'FreeBSD' ):
+ address = '/var/run/syslog'
+ elif system == 'Linux':
+ address = '/dev/log'
+ else:
+ address = ( 'localhost', SYSLOG_UDP_PORT )
+ handler = SysLogHandler( address=address )
+ handler.setFormatter( formatter )
+ log.addHandler( handler )
+ # getLevelName works in the reverse, too:
+ log.setLevel( logging.getLevelName( options.log_level ) )
+ log_spill = open( options.log_spill, 'w' ) if options.log_spill else None
+ try:
+ pid_lock_file = SmartPIDLockFile( options.pid_file )
+ with daemon.DaemonContext( uid=name_to_uid( options.user ),
+ gid=name_to_gid( options.group ),
+ stderr=log_spill, stdout=log_spill,
+ files_preserve=[ handler.socket ],
+ # True needed for systemd (see [1])
+ detach_process=True if options.init == 'systemd' else None,
+ pidfile=pid_lock_file ):
+ run( )
+ finally:
+ if log_spill:
+ log_spill.close( )
+
+
+# [1]: http://echorand.me/2013/08/02/notes-on-writing-systemd-unit-files-for-beakers-daemon-processes/
+
+
+def generate_init_script( options ):
+ from pkg_resources import resource_string
+ import cgcloud.agent
+ import platform
+
+ distro, version, codename = map( str.lower, platform.linux_distribution( ) )
+
+ console = None
+ if distro == 'ubuntu':
+ quote_level = 1
+ if codename < 'vivid':
+ script = 'init-script.upstart'
+ # Lucid's version of upstart doesn't support "console log", Precise's does, don't know
+ # about the versions in between
+ console = 'output' if codename < 'precise' else 'log'
+ else:
+ script = 'init-script.systemd'
+ else:
+ script = 'init-script.lsb'
+ quote_level = 2
+
+ init_script = resource_string( cgcloud.agent.__name__, script )
+
+ args = [ '--namespace', options.namespace,
+ '--zone', options.availability_zone,
+ '--interval', str( options.interval ),
+ '--accounts' ] + options.accounts + [
+ '--keypairs' ] + options.ec2_keypair_names + [
+ '--user', options.user,
+ '--group', options.group,
+ '--pid-file', options.pid_file,
+ '--log-level', options.log_level,
+ '--log-spill', options.log_spill ]
+ variables = vars( options ).copy( )
+ variables.update( dict( args=' '.join( shell.quote( arg, level=quote_level ) for arg in args ),
+ exec_path=exec_path,
+ exec_name=exec_name,
+ console=console,
+ description=description ) )
+ print init_script % variables
diff --git a/agent/src/cgcloud/agent/init-script.lsb b/agent/src/cgcloud/agent/init-script.lsb
new file mode 100644
index 0000000..5b97c67
--- /dev/null
+++ b/agent/src/cgcloud/agent/init-script.lsb
@@ -0,0 +1,91 @@
+#!/bin/sh
+#
+# chkconfig: 35 99 1
+# description: %(description)s
+# processname: %(exec_name)s
+# pid_file: %(pid_file)s
+#
+
+### BEGIN INIT INFO
+# Provides: %(exec_name)s
+# Required-Start: $network
+# Required-Stop: 3 5
+# Default-Start: 3 5
+# Default-Stop:
+# Short-Description: %(exec_name)s
+# Description: %(description)s
+### END INIT INFO
+
+exec_path=%(exec_path)s
+exec_name=%(exec_name)s
+pid_file=%(pid_file)s
+log_spill=%(log_spill)s
+user=%(user)s
+group=%(group)s
+
+if [ -f /etc/rc.d/init.d/functions ]; then
+ . /etc/rc.d/init.d/functions
+fi
+
+RETVAL=0
+
+start() {
+ echo -n "Starting $exec_name: "
+ mkdir -p ${pid_file%%/*} ${log_spill%%/*}
+ chown $user:$group ${pid_file%%/*}
+ chmod 755 ${pid_file%%/*} ${log_spill%%/*}
+ daemon $exec_path --init sysv %(args)s
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch /var/lock/subsys/$exec_name
+ return $RETVAL
+}
+
+stop() {
+ echo -n "Stopping $exec_name: "
+ killproc -p $pid_file $exec_path
+ RETVAL=$?
+ echo
+ if [ $RETVAL -eq 0 ]; then
+ rm -f /var/lock/subsys/$exec_name
+ fi
+}
+
+restart() {
+ stop
+ start
+}
+
+# See how we were called.
+case "$1" in
+ start)
+ [ -f /var/lock/subsys/$exec_name ] && exit 0
+ $1
+ ;;
+ stop)
+ [ -f /var/lock/subsys/$exec_name ] || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ status)
+ status -p $pid_file $exec_path
+ RETVAL=$?
+ ;;
+ condrestart|try-restart)
+ [ -f /var/lock/subsys/$exec_name ] && restart || :
+ ;;
+ reload)
+ echo "can't reload configuration, you have to restart it"
+ RETVAL=3
+ ;;
+ force-reload)
+ restart
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
+ exit 1
+ ;;
+esac
+exit $RETVAL
diff --git a/agent/src/cgcloud/agent/init-script.systemd b/agent/src/cgcloud/agent/init-script.systemd
new file mode 100644
index 0000000..72a2d16
--- /dev/null
+++ b/agent/src/cgcloud/agent/init-script.systemd
@@ -0,0 +1,19 @@
+[Unit]
+Description=%(exec_name)s
+Documentation=https://github.com/BD2KGenomics/cgcloud-agent
+After=network.target
+
+[Service]
+Type=forking
+PIDFile=%(pid_file)s
+ExecStart=%(exec_path)s --init systemd %(args)s
+User=%(user)s
+Group=%(group)s
+ExecStartPre=\
+ pid_file="%(pid_file)s" ; \
+ log_spill="%(log_spill)s" ; \
+ user=%(user)s ; \
+ group=%(group)s ; \
+ mkdir -p "${pid_file%%/*}" "${log_spill%%/*}" ; \
+ chown $user:$group "${pid_file%%/*}" ; \
+ chmod 755 ${pid_file%%/*} ${log_spill%%/*}
diff --git a/agent/src/cgcloud/agent/init-script.upstart b/agent/src/cgcloud/agent/init-script.upstart
new file mode 100644
index 0000000..955b7bc
--- /dev/null
+++ b/agent/src/cgcloud/agent/init-script.upstart
@@ -0,0 +1,16 @@
+description "%(exec_name)s"
+author "Hannes Schmidt <hannes at ucsc.edu>"
+console %(console)s
+start on runlevel [2345]
+stop on runlevel [016]
+respawn
+exec %(exec_path)s --init upstart %(args)s
+pre-start script
+ pid_file=%(pid_file)s
+ log_spill=%(log_spill)s
+ user=%(user)s
+ group=%(group)s
+ mkdir -p ${pid_file%%/*} ${log_spill%%/*}
+ chown $user:$group ${pid_file%%/*}
+ chmod 755 ${pid_file%%/*} ${log_spill%%/*}
+end script
diff --git a/core/.gitignore b/core/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/core/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/core/README.rst b/core/README.rst
new file mode 100644
index 0000000..b1b05ba
--- /dev/null
+++ b/core/README.rst
@@ -0,0 +1,393 @@
+Prerequisites
+=============
+
+To install and use CGCloud, you need
+
+* Python ≧ 2.7.x
+
+* pip_ and virtualenv_
+
+* Git_
+
+* Mac OS X: Xcode_ and the `Xcode Command Line Tools`_ (needed during the
+ installation of cgcloud-core for compiling the PyCrypto dependency)
+
+.. _pip: https://pip.readthedocs.org/en/latest/installing.html
+.. _virtualenv: https://virtualenv.pypa.io/en/latest/installation.html
+.. _Git: http://git-scm.com/
+.. _Xcode: https://itunes.apple.com/us/app/xcode/id497799835?mt=12
+.. _Xcode Command Line Tools: http://stackoverflow.com/questions/9329243/xcode-4-4-command-line-tools
+
+Installation
+============
+
+Read the entire section before pasting any commands and ensure that all
+prerequisites are installed. It is recommended to install CGCloud into a
+virtualenv. Create a virtualenv and use ``pip`` to install
+the ``cgcloud-core`` package::
+
+ virtualenv ~/cgcloud
+ source ~/cgcloud/bin/activate
+ pip install cgcloud-core
+
+* If you get ``DistributionNotFound: No distributions matching the version for
+ cgcloud-core``, try running ``pip install --pre cgcloud-core``.
+
+* If you get an error about ``yaml.h`` being missing you may need to install
+ libyaml (via HomeBrew on OS X) or libyaml-dev (via apt-get or yum on Linux).
+
+* If you get
+
+ ::
+
+ AttributeError: 'tuple' object has no attribute 'is_prerelease'
+
+ you may need to upgrade setuptools
+
+ ::
+
+ sudo pip install --upgrade setuptools
+
+* If you get
+
+ ::
+
+ ImportError: cannot import name cgcloud_version
+
+ you may need to upgrade virtualenv
+
+ ::
+
+ sudo pip install --upgrade virtualenv
+
+* If, on Mountain Lion, you get::
+
+ clang: error: unknown argument: '-mno-fused-madd' [-Wunused-command-line-argument-hard-error-in-future]
+ clang: note: this will be a hard error (cannot be downgraded to a warning) in the future
+ error: command 'clang' failed with exit status 1
+
+ try the following work-around::
+
+ export CFLAGS=-Qunused-arguments
+ export CPPFLAGS=-Qunused-arguments
+
+The installer places the ``cgcloud`` executable into the ``bin`` directory of
+the virtualenv. Before you can invoke ``cgcloud``, you have to activate the
+virtualenv as shown above. Alternatively, create a per-user bin directory and
+symlink the ``cgcloud`` executable into it::
+
+ deactivate
+ mkdir -p ~/bin
+ ln -snf ~/cgcloud/bin/cgcloud ~/bin
+
+After adding ``export PATH="$HOME/bin:$PATH"`` to your to your `~/.profile`,
+`~/.bash_profile` or `~/.bashrc`, you won't need to explicitly activate the
+virtualenv before running cgcloud.
+
+You should be able to invoke ``cgcloud`` now::
+
+ cgcloud --help
+
+Auto-completion for Bash
+========================
+
+Install the awesome argcomplete_ module::
+
+ pip install argcomplete
+
+Then add the following command to your ``~/.profile``::
+
+ eval "$(/absolute/path/to/virtualenv/bin/register-python-argcomplete cgcloud)"
+
+.. _argcomplete: https://github.com/kislyuk/argcomplete
+
+Configuration
+=============
+
+Access keys
+-----------
+
+Ask your AWS admin to setup an IAM account in AWS for you. Log into Amazon's
+IAM console and generate an `access key`_ for yourself. While your IAM username
+and password are used to authenticate yourself for interactive use via the AWS
+console, the access key is used for programmatic access via ``cgcloud``.
+
+Once you have an access key, create ``~/.boto`` on you local computer with the
+following contents::
+
+ [Credentials]
+ aws_access_key_id = PASTE_YOUR_ACCESS_KEY_ID_HERE
+ aws_secret_access_key = PASTE_YOUR_SECRET_ACCESS_KEY_HERE
+
+
+The ``~/.boto`` file is being deprecated. Consider using ``~/.aws/credentials``
+instead. It is supported by various AWS SDKs and allows for easily switching
+between different AWS accounts (profiles)::
+
+ [foo]
+ aws_access_key_id=PASTE_YOUR_FOO_ACCESS_KEY_ID_HERE
+ aws_secret_access_key=PASTE_YOUR_FOO_SECRET_KEY_ID_HERE
+ region=us-west-2
+
+ [bar]
+ aws_access_key_id=PASTE_YOUR_BAR_ACCESS_KEY_ID_HERE
+ aws_secret_access_key=PASTE_YOUR_BAR_SECRET_KEY_ID_HERE
+ region=us-west-2
+
+To choose an active profile, set the ``AWS_PROFILE`` environment variable::
+
+ export AWS_PROFILE=foo
+
+.. _access key: http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html
+
+EC2 region and availability zone
+--------------------------------
+
+Edit your ``~/.profile`` or ``~/.bash_profile`` and add the following line::
+
+ export CGCLOUD_ZONE=us-west-2a
+
+This configures both the region ``us-west-2`` and the availability zone within
+that region: ``a``. Instead of ``us-west-2a`` you could use ``us-east-1a`` or
+any other zone in any other EC2 region.
+
+Public SSH key
+--------------
+
+If you don't have an SSH key, you can create one using the ``ssh-keygen``
+command. Do not use the EC2 console to generate a key. This would be insecure
+and produce a key that is incompatible with CGCloud.
+
+Register your SSH key in EC2 by running::
+
+ cgcloud register-key ~/.ssh/id_rsa.pub
+
+The above command imports the given public key to EC2 as a key pair (I know,
+the terminology is confusing) but also uploads it to S3, see next paragraph for
+an explanation. The name of the key pair in EC2 will be set to your IAM user
+account name. In S3 the public key will be stored under its fingerprint.
+
+If cgcloud complains that the ``Private key file is encrypted``, your private
+key is probably encrypted with a passphrase (as it should). You need to add the
+key to the SSH agent via ``ssh-add`` which should prompt you for the
+passphrase. On Mac OS X this can be made more convenient by running ``ssh-add
+-K`` or ``ssh-add -K /path/to/private/key`` once. This will automatically add
+the key to the agent every time you log in. The passphrase will be stored in OS
+X's key chain so won't have to enter it again.
+
+
+Note: Importing your key pair using the EC2 console is not equivalent to
+``cgcloud register-key`` . In order to be able to manage key pairs within a
+team, CGCloud needs to know the contents of the public key for every team
+member's key pair. But EC2 only exposes a fingerprint via its REST API, not the
+actual public key. For this purpose, CGCloud maintains those public keys in a
+special S3 bucket. Using ``cgcloud register-key`` makes sure that the public
+key is imported to EC2 *and* uploaded to that special S3 bucket. Also note that
+while that S3 bucket is globally visible and the public keys stored therein
+apply across regions, the corresponding key pair in EC2 is only visible within
+a zone. So when you switch to a different region, you will have to use
+``cgcloud register-key`` again to import the key pair into that EC2 region.
+
+Multi-user SSH logins
+---------------------
+
+By default, CGCloud only injects your public key into the boxes that it
+creates. This means that only you can SSH into those boxes. If you want other
+people to be able to SSH into boxes created by you, you can specify a list of
+key pairs to be injected into boxes. You can do so as using the ``-k`` command
+line option to ``cgcloud create`` or by setting the ``CGCLOUD_KEYPAIRS``
+environment variable. The latter will inject those key pairs by default into
+every box that you create. The default for ``-k`` is the special string
+``__me__`` which is substituted with the name of the current IAM user, i.e.
+you. This only works your IAM user account and your SSH key pair in EC2 have
+the same name, a practice that is highly recommended. The ``cgcloud
+register-key`` command follows that convention by default.
+
+The most useful shortcut for ``-k`` and ``CGCLOUD_KEYPAIRS`` however is to list
+the name of an IAM group by prefixing the group name with ``@@``. Assuming that
+there exists an IAM group called ``developers``, adding the following line to
+your ``.profile`` or ``.bash_profile``::
+
+ export CGCLOUD_KEYPAIRS="__me__ @@developers"
+
+will inject your own key pair and the key pair of every user in the
+``developers`` IAM group into every box that you create from that point
+onwards. Obviously, this only works if EC2 key pairs and IAM usernames are
+identical but as mentioned above, if you used ``cgcloud register-key`` this
+should be the case.
+
+In the above example, if a user is removed from the IAM group ``developers`` or
+if their key pair is deleted from EC2, his or her key pair will be
+automatically removed from every box that was created with that value of
+``CGCLOUD_KEYPAIRS``.
+
+Note that a change to ``CGCLOUD_KEYPAIRS`` does not affect boxes created with
+``cgcloud recreate ROLE``. You will need to create a new image using ``cgcloud
+create -IT ROLE`` for the change to take effect.
+
+First steps
+===========
+
+You're now ready to create your first *box* aka EC2 instance or VM::
+
+ cgcloud create generic-ubuntu-trusty-box
+
+This creates a Ubuntu Trusty instance from a stock Ubuntu AMI and then further
+customizes it by running additional commands via SSH. It'll take a few minutes.
+The ``generic-ubuntu-trusty-box`` argument denotes a *role*, i.e. a blueprint
+for an instance. You can use ``cgcloud list-roles`` to see the available roles.
+
+Now login to the newly created box::
+
+ cgcloud ssh generic-ubuntu-trusty-box
+
+The astute reader will notice that it is not necessary to remember the public
+hostname assigned to the box. As long as there is only one box per role, you
+can refer to the box by using the role's name. Otherwise you will need to
+disambiguate by specifying an ordinal using the ``-o`` option. Use ``cgcloud
+list`` to view all running instances and their ordinals.
+
+Also note that it isn't necessary to specify the account name of the
+administrative user to log in as, e.g. ``ec2-user``, ``root`` or ``ubuntu`` .
+The stock images for the various Linux distributions use different account
+names but CGCloud conveniently hides these differences.
+
+In order to copy files to and from the box you can use ``cgcloud rsync``::
+
+ cgcloud rsync generic-ubuntu-trusty-box -av ~/mystuff :
+
+The ``cgcloud rsync`` command behaves like a prefix to the ``rsync`` command
+with one important difference: With rsync you would specify the remote hostname
+followed by a colon, with ``cgcloud rsync`` you simply leave the hostname blank
+and only specify a colon followed by the remote path. If you omit the remote
+path, the home directory of the administrative user will be used.
+
+You can now stop the box with ``cgcloud stop``, start it again using ``cgcloud
+start`` or terminate it using ``cgcloud terminate``. Note while a stopped
+instance is much cheaper than a running instance, it is not free. Only the
+``terminate`` command will reduce the operating cost incurred by the instance
+to zero.
+
+If you want to preserve the modifications you made to the box such that you can
+spawn another box in the future just like it, stop the box and then create an
+image of it using the ``cgcloud image`` command. You may then use the ``cgcloud
+recreate`` command to bring up a box.
+
+Philosophical remarks
+=====================
+
+While creating an image is a viable mechanism to preserve manual modifications
+to a box, it is not the best possible way. The problem with it is that you will
+be stuck with the base image release the box was created from. You will also be
+stuck with the customizations performed by the particular version of
+``cgcloud`` you were using. If either the base image or the role definition in
+CGCloud is updated, you will not benefit from those updates. Therefore, the
+preferred way of customizing a box is by *scripting* the customizations. This
+is typically done by creating a CGCloud plugin, i.e. a Python package with VM
+definitions aka ``roles``. A role is a subclass of the Box class while a box
+(aka VM aka EC2 instance) is an instance of that class. The prominent design
+patterns formed by Box and its derived classes are *Template Method* and
+*Mix-in*. The mix-in pattern introduces a sensitivity to Python's method
+resolution order so you need to be aware of that.
+
+Creating an image makes sense even if you didn't make any modifications after
+``cgcloud create``. It captures all role-specific customizations made by
+``cgcloud create``, thereby protecting them from changes in the role
+definition, the underlying base image and package updates in the Linux
+distribution used by the box. This is key to CGCloud's philosophy: It gives you
+a way to *create* an up-to-date image with all the latest software according to
+your requirements **and** it allows you reliably reproduce the exact result of
+that step. The fact that ``recreate`` is much faster than ``create`` is icing
+on the cake.
+
+
+Building & Testing
+==================
+
+First, clone this repository and ``cd`` into it. To run the tests use
+
+* ``python setup.py nosetests --with-doctest``,
+* ``python setup.py test``,
+* ``nosetest`` or
+* ``python -m unittest discover -s src``.
+
+We prefer the way listed first as it installs all requirements **and** runs the
+tests under Nose, a test runner superior to ``unittest`` that can run tests in
+parallel and produces Xunit-like test reports. For example, on continuous
+integration we use
+
+::
+
+ virtualenv env
+ env/bin/python setup.py nosetests --processes=16 --process-timeout=900
+
+To make an editable_ install, also known as *development mode*, use ``python
+setup.py develop``. To remove the editable install ``python setup.py develop
+-u``.
+
+.. _editable: http://pythonhosted.org//setuptools/setuptools.html#development-mode
+
+Troubleshooting
+===============
+
+* If ``cgcloud create`` gets stuck repeatedly printing ``Private key file is
+ encrypted``, your private key is probably encrypted with a passphrase (as it
+ should). You need to add the key to the SSH agent via ``ssh-add`` which
+ should prompt you for the passphrase. On Mac OS X this can be made more
+ convenient by running ``ssh-add -K`` or ``ssh-add -K /path/to/private/key``
+ once. This will automatically add the key to the agent every time you log in.
+ The passphrase will be stored in OS X's key chain so won't have to enter it
+ again.
+
+* If you get the following error::
+
+ ERROR: Exception: Incompatible ssh peer (no acceptable kex algorithm)
+ ERROR: Traceback (most recent call last):
+ ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1585, in run
+ ERROR: self._handler_table[ptype](self, m)
+ ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1664, in _negotiate_keys
+ ERROR: self._parse_kex_init(m)
+ ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1779, in _parse_kex_init
+ ERROR: raise SSHException('Incompatible ssh peer (no acceptable kex algorithm)')
+ ERROR: SSHException: Incompatible ssh peer (no acceptable kex algorithm)
+
+try upgrading paramiko::
+
+ pip install --upgrade paramiko
+
+See also https://github.com/fabric/fabric/issues/1212
+
+Customization
+=============
+
+CGCloud can be customized via plugins. A plugin is a Python module or package
+containing two functions::
+
+ def roles():
+ """
+ Return a list of roles, each role being a concrete subclass of
+ cgcloud.core.box.Box
+ """
+ return [ FooBox ]
+
+ def command_classes():
+ """
+ Return a list of command classes, each class being a concrete subclass of
+ cgcloud.lib.util.Command.
+ """
+ return [ FooCommand ]
+
+If the plugin is a Python package, these two functions need to be defined in
+its ``__init__.py``. The box and command classes returned by these two
+functions can be defined in submodules of that package.
+
+In order to be loaded by CGCloud, a plugin needs to be loadable from
+``sys.path`` and its module path (foo.bar.blah) needs to be mentioned in the
+``CGCLOUD_PLUGINS`` environment variable which should contains a
+colon-separated list of plugin module paths.
+
+You can also run CGCloud with the ``--script`` option and a path to a Python
+script. The script will be handled like a plugin, except that it should not
+define a ``command_classes()`` function since that function will not be invoked
+for a script plugin. In other words, a script plugin should only define roles,
+not commands.
diff --git a/core/main.py b/core/main.py
new file mode 100755
index 0000000..24c7b31
--- /dev/null
+++ b/core/main.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python2.7
+
+import os
+
+import sys
+
+sys.path.append( os.path.join( os.path.dirname( __file__ ), 'src' ) )
+
+from cgcloud.core.cli import main
+
+if __name__ == "__main__":
+ main( )
diff --git a/core/setup.cfg b/core/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/core/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/core/setup.py b/core/setup.py
new file mode 100644
index 0000000..72fd9dd
--- /dev/null
+++ b/core/setup.py
@@ -0,0 +1,32 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, boto_dep, fabric_dep
+
+setup( name='cgcloud-core',
+ version=cgcloud_version,
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Efficient and reproducible software deployment for EC2 instances',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src', exclude=[ '*.test' ] ),
+ namespace_packages=[ 'cgcloud' ],
+ entry_points={
+ 'console_scripts': [
+ 'cgcloud = cgcloud.core.cli:main' ], },
+ install_requires=[ bd2k_python_lib_dep,
+ 'cgcloud-lib==' + cgcloud_version,
+ 'futures==3.0.4',
+ # such that cgcloud-lib can use the futures backport for its thread_pool
+ boto_dep,
+ fabric_dep,
+ 'paramiko==1.16.0',
+ 'futures==3.0.4',
+ 'PyYAML==3.11',
+ 'subprocess32==3.2.7',
+ 'tabulate==0.7.5'],
+ test_suite='cgcloud.core.test' )
diff --git a/core/src/cgcloud/__init__.py b/core/src/cgcloud/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/core/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/core/src/cgcloud/core/__init__.py b/core/src/cgcloud/core/__init__.py
new file mode 100644
index 0000000..31714dd
--- /dev/null
+++ b/core/src/cgcloud/core/__init__.py
@@ -0,0 +1,50 @@
+from cgcloud.core.deprecated import is_deprecated
+
+
+def __fail_deprecated( artifacts ):
+ for artifact in artifacts:
+ if is_deprecated( artifact ):
+ raise DeprecationWarning( artifact )
+ return artifacts
+
+
+def roles( ):
+ from cgcloud.core.generic_boxes import (GenericCentos6Box,
+ GenericUbuntuPreciseBox,
+ GenericUbuntuTrustyBox,
+ GenericUbuntuVividBox,
+ GenericFedora21Box,
+ GenericFedora22Box)
+ return __fail_deprecated( sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) )
+
+
+def command_classes( ):
+ from cgcloud.core.commands import (ListRolesCommand,
+ CreateCommand,
+ RecreateCommand,
+ StartCommand,
+ StopCommand,
+ RebootCommand,
+ TerminateCommand,
+ ImageCommand,
+ ShowCommand,
+ SshCommand,
+ RsyncCommand,
+ ListCommand,
+ ListImagesCommand,
+ DeleteImageCommand,
+ RegisterKeyCommand,
+ CleanupCommand,
+ UpdateInstanceProfile,
+ ResetSecurityCommand,
+ ListOptionsCommand)
+ from cgcloud.core.cluster_commands import (CreateClusterCommand,
+ StartClusterCommand,
+ StopClusterCommand,
+ TerminateClusterCommand,
+ SshClusterCommand,
+ RsyncClusterCommand,
+ GrowClusterCommand)
+ return __fail_deprecated( sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) )
+
+
diff --git a/core/src/cgcloud/core/agent_box.py b/core/src/cgcloud/core/agent_box.py
new file mode 100644
index 0000000..3a1ffae
--- /dev/null
+++ b/core/src/cgcloud/core/agent_box.py
@@ -0,0 +1,185 @@
+import base64
+import zlib
+from bd2k.util.iterables import concat
+
+from fabric.context_managers import settings
+from fabric.operations import run
+
+from bd2k.util import shell, less_strict_bool
+from bd2k.util.strings import interpolate as fmt
+
+from cgcloud.core.init_box import AbstractInitBox
+from cgcloud.core.common_iam_policies import *
+from cgcloud.fabric.operations import sudo, pip
+from cgcloud.core.package_manager_box import PackageManagerBox
+from cgcloud.lib.util import abreviated_snake_case_class_name
+from cgcloud.core.box import fabric_task
+
+
+class AgentBox( PackageManagerBox, AbstractInitBox ):
+ """
+ A box on which to install the agent.
+ """
+
+ def other_accounts( self ):
+ """
+ Returns the names of accounts for which, in addition to the account returned by
+ Box.username(), authorized SSH keys should be managed by this agent.
+ """
+ return [ ]
+
+ agent_depends_on_pycrypto = False
+
+ def __init__( self, ctx ):
+ super( AgentBox, self ).__init__( ctx )
+ self._enable_agent = None
+
+ @property
+ def enable_agent( self ):
+ if self._enable_agent is None:
+ raise RuntimeError(
+ "Enable_agent property hasn't been set. Must call _set_instance_options() before "
+ "using this instance." )
+ return self._enable_agent
+
+ def _set_instance_options( self, options ):
+ super( AgentBox, self )._set_instance_options( options )
+ self._enable_agent = less_strict_bool( options.get( 'enable_agent' ) )
+
+ def _get_instance_options( self ):
+ return self.__get_options( super( AgentBox, self )._get_instance_options( ) )
+
+ def _get_image_options( self ):
+ return self.__get_options( super( AgentBox, self )._get_image_options( ) )
+
+ def __get_options( self, options ):
+ return dict( options, enable_agent=str( self.enable_agent ) )
+
+ def _manages_keys_internally( self ):
+ return self.enable_agent
+
+ def _list_packages_to_install( self ):
+ packages = super( AgentBox, self )._list_packages_to_install( )
+ if self.enable_agent:
+ packages += [
+ 'python',
+ 'python-pip' ]
+ if self.agent_depends_on_pycrypto:
+ packages += [
+ 'python-dev',
+ 'autoconf',
+ 'automake',
+ 'binutils',
+ 'gcc',
+ 'make' ]
+ return packages
+
+ @fabric_task
+ def _post_install_packages( self ):
+ super( AgentBox, self )._post_install_packages( )
+ if self.enable_agent:
+ self.__setup_agent( )
+
+ def _enable_agent_metrics( self ):
+ """
+ Overide this in a subclass to enable reporting of additional CloudWatch metrics like disk
+ space and memory. The metric collection requires the psutil package which in turn
+ requires a compiler and Python headers to be installed.
+ """
+ return False
+
+ def __setup_agent( self ):
+ availability_zone = self.ctx.availability_zone
+ namespace = self.ctx.namespace
+ ec2_keypair_globs = ' '.join( shell.quote( _ ) for _ in self.ec2_keypair_globs )
+ accounts = ' '.join( [ self.admin_account( ) ] + self.other_accounts( ) )
+ admin_account = self.admin_account( )
+ run_dir = '/var/run/cgcloudagent'
+ log_dir = '/var/log'
+ install_dir = '/opt/cgcloudagent'
+
+ # Lucid & CentOS 5 have an ancient pip
+ pip( 'install --upgrade pip==1.5.2', use_sudo=True )
+ pip( 'install --upgrade virtualenv', use_sudo=True )
+ sudo( fmt( 'mkdir -p {install_dir}' ) )
+ sudo( fmt( 'chown {admin_account}:{admin_account} {install_dir}' ) )
+ # By default, virtualenv installs the latest version of pip. We want a specific
+ # version, so we tell virtualenv not to install pip and then install that version of
+ # pip using easy_install.
+ run( fmt( 'virtualenv --no-pip {install_dir}' ) )
+ run( fmt( '{install_dir}/bin/easy_install pip==1.5.2' ) )
+
+ with settings( forward_agent=True ):
+ venv_pip = install_dir + '/bin/pip'
+ if self._enable_agent_metrics( ):
+ pip( path=venv_pip, args='install psutil==3.4.1' )
+ with self._project_artifacts( 'agent' ) as artifacts:
+ pip( path=venv_pip,
+ args=concat( 'install',
+ '--allow-external', 'argparse', # needed on CentOS 5 and 6
+ artifacts ) )
+
+ sudo( fmt( 'mkdir {run_dir}' ) )
+ script = self.__gunzip_base64_decode( run( fmt(
+ '{install_dir}/bin/cgcloudagent'
+ ' --init-script'
+ ' --zone {availability_zone}'
+ ' --namespace {namespace}'
+ ' --accounts {accounts}'
+ ' --keypairs {ec2_keypair_globs}'
+ ' --user root'
+ ' --group root'
+ ' --pid-file {run_dir}/cgcloudagent.pid'
+ ' --log-spill {log_dir}/cgcloudagent.out'
+ '| gzip -c | base64' ) ) )
+ self._register_init_script( 'cgcloudagent', script )
+ self._run_init_script( 'cgcloudagent' )
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( AgentBox, self )._get_iam_ec2_role( )
+ if self.enable_agent:
+ iam_role_name += '--' + abreviated_snake_case_class_name( AgentBox )
+ policies.update( dict(
+ ec2_read_only=ec2_read_only_policy,
+ s3_read_only=s3_read_only_policy,
+ iam_read_only=iam_read_only_policy,
+ sqs_agent=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action=[
+ "sqs:Get*",
+ "sqs:List*",
+ "sqs:CreateQueue",
+ "sqs:SetQueueAttributes",
+ "sqs:ReceiveMessage",
+ "sqs:DeleteMessage" ] ) ] ),
+ sns_agent=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action=[
+ "sns:Get*",
+ "sns:List*",
+ "sns:CreateTopic",
+ "sns:Subscribe" ] ) ] ),
+ cloud_watch=dict( Version='2012-10-17', Statement=[
+ dict( Effect='Allow', Resource='*', Action=[
+ 'cloudwatch:Get*',
+ 'cloudwatch:List*',
+ 'cloudwatch:PutMetricData' ] ) ] ) ) )
+ return iam_role_name, policies
+
+ @staticmethod
+ def __gunzip_base64_decode( s ):
+ """
+ Fabric doesn't have get( ..., use_sudo=True ) [1] so we need to use
+
+ sudo( 'cat ...' )
+
+ to download protected files. However it also munges line endings [2] so to be safe we
+
+ sudo( 'cat ... | gzip | base64' )
+
+ and this method unravels that.
+
+ [1]: https://github.com/fabric/fabric/issues/700
+ [2]: https://github.com/trehn/blockwart/issues/39
+ """
+ # See http://stackoverflow.com/questions/2695152/in-python-how-do-i-decode-gzip-encoding#answer-2695466
+ # for the scoop on 16 + zlib.MAX_WBITS.
+ return zlib.decompress( base64.b64decode( s ), 16 + zlib.MAX_WBITS )
diff --git a/core/src/cgcloud/core/apache.py b/core/src/cgcloud/core/apache.py
new file mode 100644
index 0000000..a497d79
--- /dev/null
+++ b/core/src/cgcloud/core/apache.py
@@ -0,0 +1,65 @@
+import json
+import logging
+import os
+
+from bd2k.util.strings import interpolate as fmt
+from fabric.operations import run
+
+from cgcloud.core.box import Box
+from cgcloud.fabric.operations import sudo
+
+log = logging.getLogger( __name__ )
+
+
+class ApacheSoftwareBox( Box ):
+ """
+ A box to be mixed in to ease the hassle of installing Apache Software
+ Foundation released software distros.
+ """
+
+ def _install_apache_package( self, remote_path, install_dir ):
+ """
+ Download the given package from an Apache download mirror and extract it to a child
+ directory of the directory at the given path.
+
+ :param str remote_path: the URL path of the package on the Apache download server and its
+ mirrors.
+
+ :param str install_dir: The path to a local directory in which to create the directory
+ containing the extracted package.
+ """
+ # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
+ components = remote_path.split( '/' )
+ package, tarball = components[ 0 ], components[ -1 ]
+ # Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
+ tries = iter( xrange( 3 ) )
+ while True:
+ try:
+ mirror_url = self.__apache_s3_mirror_url( remote_path )
+ if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed:
+ mirror_url = self.__apache_official_mirror_url( remote_path )
+ run( "curl -Ofs '%s'" % mirror_url )
+ try:
+ sudo( fmt( 'mkdir -p {install_dir}/{package}' ) )
+ sudo( fmt( 'tar -C {install_dir}/{package} '
+ '--strip-components=1 -xzf {tarball}' ) )
+ return
+ finally:
+ run( fmt( 'rm {tarball}' ) )
+ except SystemExit:
+ if next( tries, None ) is None:
+ raise
+ else:
+ log.warn( "Could not download or extract the package, retrying ..." )
+
+ def __apache_official_mirror_url( self, remote_path ):
+ url = 'http://www.apache.org/dyn/closer.cgi?path=%s&asjson=1' % remote_path
+ mirrors = run( "curl -fs '%s'" % url )
+ mirrors = json.loads( mirrors )
+ mirror = mirrors[ 'preferred' ]
+ url = mirror + remote_path
+ return url
+
+ def __apache_s3_mirror_url( self, remote_path ):
+ file_name = os.path.basename( remote_path )
+ return 'https://s3-us-west-2.amazonaws.com/bd2k-artifacts/cgcloud/' + file_name
diff --git a/core/src/cgcloud/core/box.py b/core/src/cgcloud/core/box.py
new file mode 100644
index 0000000..88ce79d
--- /dev/null
+++ b/core/src/cgcloud/core/box.py
@@ -0,0 +1,1634 @@
+import datetime
+import hashlib
+import socket
+# cluster ssh and rsync commands need thread-safe subprocess
+import subprocess32
+import threading
+import time
+from StringIO import StringIO
+from abc import ABCMeta, abstractmethod
+from collections import namedtuple, Iterator
+from contextlib import closing, contextmanager
+from copy import copy
+from functools import partial, wraps
+from itertools import count, izip
+from operator import attrgetter
+from pipes import quote
+
+from bd2k.util.collections import OrderedSet
+from bd2k.util.exceptions import panic
+from bd2k.util.expando import Expando
+from bd2k.util.iterables import concat
+from boto import logging
+from boto.ec2.blockdevicemapping import BlockDeviceType, BlockDeviceMapping
+from boto.ec2.instance import Instance
+from boto.ec2.spotpricehistory import SpotPriceHistory
+from boto.exception import BotoServerError, EC2ResponseError
+from fabric.api import execute
+from fabric.context_managers import settings
+from fabric.operations import sudo, run, get, put
+from paramiko import SSHClient
+from paramiko.client import MissingHostKeyPolicy
+
+from cgcloud.core.project import project_artifacts
+from cgcloud.lib import aws_d32
+from cgcloud.lib.context import Context
+from cgcloud.lib.ec2 import (ec2_instance_types,
+ wait_instances_running,
+ inconsistencies_detected,
+ create_spot_instances,
+ create_ondemand_instances,
+ tag_object_persistently)
+from cgcloud.lib.ec2 import retry_ec2, a_short_time, a_long_time, wait_transition
+from cgcloud.lib.util import (UserError,
+ camel_to_snake,
+ ec2_keypair_fingerprint,
+ private_to_public_key,
+ mean, std_dev)
+
+log = logging.getLogger( __name__ )
+
+
+# noinspection PyPep8Naming
+class fabric_task( object ):
+ # A stack to stash the current fabric user before a new one is set via this decorator
+ user_stack = [ ]
+ # A reentrant lock to prevent multiple concurrent uses of fabric, which is not thread-safe
+ lock = threading.RLock( )
+
+ def __new__( cls, user=None ):
+ if callable( user ):
+ return cls( )( user )
+ else:
+ return super( fabric_task, cls ).__new__( cls )
+
+ def __init__( self, user=None ):
+ self.user = user
+
+ def __call__( self, function ):
+ @wraps( function )
+ def wrapper( box, *args, **kwargs ):
+ with self.lock:
+ user = box.admin_account( ) if self.user is None else self.user
+ user_stack = self.user_stack
+ if user_stack and user_stack[ -1 ] == user:
+ return function( box, *args, **kwargs )
+ else:
+ user_stack.append( user )
+ try:
+ task = partial( function, box, *args, **kwargs )
+ task.name = function.__name__
+ # noinspection PyProtectedMember
+ return box._execute_task( task, user )
+ finally:
+ assert user_stack.pop( ) == user
+
+ return wrapper
+
+
+class Box( object ):
+ """
+ Manage EC2 instances. Each instance of this class represents a single virtual machine (aka
+ instance) in EC2.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @classmethod
+ def role( cls ):
+ """
+ The name of the role performed by instances of this class, or rather by the EC2 instances
+ they represent.
+ """
+ return camel_to_snake( cls.__name__, '-' )
+
+ @abstractmethod
+ def admin_account( self ):
+ """
+ Returns the name of a user that has sudo privileges. All administrative commands on the
+ box are invoked via SSH as this user.
+ """
+ raise NotImplementedError( )
+
+ def default_account( self ):
+ """
+ Returns the name of the user with which interactive SSH session are started on the box.
+ The default implementation forwards to self.admin_account().
+ """
+ return self.admin_account( )
+
+ def _image_name_prefix( self ):
+ """
+ Returns the prefix to be used for naming images created from this box
+ """
+ return self.role( )
+
+ class NoSuchImageException( RuntimeError ):
+ pass
+
+ @abstractmethod
+ def _base_image( self, virtualization_type ):
+ """
+ Returns the default base image that boxes performing this role should be booted from
+ before they are being setup
+
+ :rtype: boto.ec2.image.Image
+
+ :raises Box.NoSuchImageException: if no image exists for this role and the given
+ virtualization type
+ """
+ raise NotImplementedError( )
+
+ @abstractmethod
+ def setup( self, **kwargs ):
+ """
+ Create the EC2 instance represented by this box, install OS and additional packages on,
+ optionally create an AMI image of it, and/or terminate it.
+ """
+ raise NotImplementedError( )
+
+ @abstractmethod
+ def _ephemeral_mount_point( self, i ):
+ """
+ Returns the absolute path to the directory at which the i-th ephemeral volume is mounted
+ or None if no such mount point exists. Note that there must always be a mountpoint for
+ the first volume, so this method always returns a value other than None if i is 0. We
+ have this method because the mount point typically depends on the distribution, and even
+ on the author of the image.
+ """
+ raise NotImplementedError( )
+
+ def _manages_keys_internally( self ):
+ """
+ Returns True if this box manages its own keypair, e.g. via the agent.
+ """
+ return False
+
+ def _populate_ec2_keypair_globs( self, ec2_keypair_globs ):
+ """
+ Populate the given list with keypair globs defining the set of keypairs whose public
+ component will be deployed to this box.
+
+ :param ec2_keypair_globs: the suggested list of globs, to be modified in place
+ """
+ pass
+
+ def __init__( self, ctx ):
+ """
+ Before invoking any methods on this object,
+ you must ensure that a corresponding EC2 instance exists by calling either
+
+ * prepare() and create()
+ * bind()
+
+ :type ctx: Context
+ """
+
+ # The context to be used by the instance
+ self.ctx = ctx
+
+ # The image the instance was or will be booted from
+ self.image_id = None
+
+ # The SSH key pairs to be injected into the instance.
+ self.ec2_keypairs = None
+
+ # The globs from which to derive the SSH key pairs to be inhected into the instance
+ self.ec2_keypair_globs = None
+
+ # The instance represented by this box
+ self.instance = None
+
+ # The number of previous generations of this box. When an instance is booted from a stock
+ # AMI, generation is 0. After that instance is set up and imaged and another instance is
+ # booted from the resulting AMI, generation will be 1.
+ self.generation = None
+
+ # The ordinal of this box within a cluster of boxes. For boxes that don't join a cluster,
+ # this will be 0
+ self.cluster_ordinal = None
+
+ # The name of the cluster this box is a node of, or None if this box is not in a cluster.
+ self.cluster_name = None
+
+ # Role-specifc options for this box
+ self.role_options = { }
+
+ @property
+ def instance_id( self ):
+ return self.instance and self.instance.id
+
+ @property
+ def ip_address( self ):
+ return self.instance and self.instance.ip_address
+
+ @property
+ def private_ip_address( self ):
+ """
+ Set by bind() and create(), the private IP address of this instance
+ """
+ return self.instance and self.instance.private_ip_address
+
+ @property
+ def host_name( self ):
+ return self.instance and self.instance.public_dns_name
+
+ @property
+ def launch_time( self ):
+ return self.instance and self.instance.launch_time
+
+ @property
+ def state( self ):
+ return self.instance and self.instance.state
+
+ @property
+ def zone( self ):
+ return self.instance and self.instance.placement
+
+ @property
+ def role_name( self ):
+ return self.role( )
+
+ @property
+ def instance_type( self ):
+ return self.instance and self.instance.instance_type
+
+ possible_root_devices = ('/dev/sda1', '/dev/sda', '/dev/xvda')
+
+ # FIXME: this can probably be rolled into prepare()
+
+ def _spec_block_device_mapping( self, spec, image ):
+ """
+ Add, remove or modify the keyword arguments that will be passed to the EC2 run_instances
+ request.
+
+ :type image: boto.ec2.image.Image
+ :type spec: dict
+ """
+ for root_device in self.possible_root_devices:
+ root_bdt = image.block_device_mapping.get( root_device )
+ if root_bdt:
+ root_bdt.size = 10
+ root_bdt.snapshot_id = None
+ root_bdt.encrypted = None
+ root_bdt.delete_on_termination = True
+ bdm = spec.setdefault( 'block_device_map', BlockDeviceMapping( ) )
+ bdm[ '/dev/sda1' ] = root_bdt
+ for i in range( ec2_instance_types[ spec[ 'instance_type' ] ].disks ):
+ device = '/dev/sd' + chr( ord( 'b' ) + i )
+ bdm[ device ] = BlockDeviceType( ephemeral_name='ephemeral%i' % i )
+ return
+ raise RuntimeError( "Can't determine root volume from image" )
+
+ def __select_image( self, image_ref ):
+ if isinstance( image_ref, int ):
+ images = self.list_images( )
+ try:
+ return images[ image_ref ]
+ except IndexError:
+ raise UserError( "No image with ordinal %i for role %s"
+ % (image_ref, self.role( )) )
+ else:
+ return self.ctx.ec2.get_image( image_ref )
+
+ def _security_group_name( self ):
+ """
+ Override the security group name to be used for this box
+ """
+ return self.role( )
+
+ def __setup_security_groups( self, vpc_id=None ):
+ log.info( 'Setting up security group ...' )
+ name = self.ctx.to_aws_name( self._security_group_name( ) )
+ try:
+ sg = self.ctx.ec2.create_security_group(
+ name=name,
+ vpc_id=vpc_id,
+ description="Security group for box of role %s in namespace %s" % (
+ self.role( ), self.ctx.namespace) )
+ except EC2ResponseError as e:
+ if e.error_code == 'InvalidGroup.Duplicate':
+ filters = { 'group-name': name }
+ if vpc_id is not None:
+ filters[ 'vpc-id' ] = vpc_id
+ for attempt in retry_ec2( retry_while=inconsistencies_detected,
+ retry_for=10 * 60 ):
+ with attempt:
+ sgs = self.ctx.ec2.get_all_security_groups( filters=filters )
+ assert len( sgs ) == 1
+ sg = sgs[ 0 ]
+ else:
+ raise
+ # It's OK to have two security groups of the same name as long as their VPC is distinct.
+ assert vpc_id is None or sg.vpc_id == vpc_id
+ rules = self._populate_security_group( sg.id )
+ for rule in rules:
+ try:
+ for attempt in retry_ec2( retry_while=inconsistencies_detected,
+ retry_for=10 * 60 ):
+ with attempt:
+ assert self.ctx.ec2.authorize_security_group( group_id=sg.id, **rule )
+ except EC2ResponseError as e:
+ if e.error_code == 'InvalidPermission.Duplicate':
+ pass
+ else:
+ raise
+ # FIXME: What about stale rules? I tried writing code that removes them but gave up. The
+ # API in both boto and EC2 is just too brain-dead.
+ log.info( '... finished setting up %s.', sg.id )
+ return [ sg.id ]
+
+ def _populate_security_group( self, group_id ):
+ """
+ :return: A list of rules, each rule is a dict with keyword arguments to
+ boto.ec2.connection.EC2Connection.authorize_security_group, namely
+
+ ip_protocol
+ from_port
+ to_port
+ cidr_ip
+ src_security_group_name
+ src_security_group_owner_id
+ src_security_group_group_id
+ """
+ return [
+ dict( ip_protocol='tcp', from_port=22, to_port=22, cidr_ip='0.0.0.0/0' ),
+ # This is necessary to allow PMTUD. A common symptom for PMTUD not working is that
+ # TCP connections hang after a certain constant amount of data has been transferred
+ # if the connection is between the instance and a host with jumbo frames enabled.
+ dict( ip_protocol='icmp', from_port=3, to_port=4, cidr_ip='0.0.0.0/0' ) ]
+
+ def __get_virtualization_types( self, instance_type, requested_vtype=None ):
+ instance_vtypes = OrderedSet( ec2_instance_types[ instance_type ].virtualization_types )
+ role_vtypes = OrderedSet( self.supported_virtualization_types( ) )
+ supported_vtypes = instance_vtypes & role_vtypes
+ if supported_vtypes:
+ if requested_vtype is None:
+ virtualization_types = list( supported_vtypes )
+ else:
+ if requested_vtype in supported_vtypes:
+ virtualization_types = [ requested_vtype ]
+ else:
+ raise UserError( 'Virtualization type %s not supported by role %s and instance '
+ 'type %s' % (requested_vtype, self.role( ), instance_type) )
+ else:
+ raise RuntimeError( 'Cannot find any virtualization types supported by both role '
+ '%s and instance type %s' % (self.role( ), instance_type) )
+
+ return virtualization_types
+
+ def __get_image( self, virtualization_types, image_ref=None ):
+ if image_ref is None:
+ for virtualization_type in virtualization_types:
+ log.info( "Looking up default image for role %s and virtualization type %s, ... ",
+ self.role( ), virtualization_type )
+ try:
+ image = self._base_image( virtualization_type )
+ except self.NoSuchImageException as e:
+ log.info( "... %s", e.message )
+ else:
+ log.info( "... found %s.", image.id )
+ assert (image.virtualization_type in virtualization_types)
+ return image
+ raise RuntimeError( "Could not find suitable image for role %s", self.role( ) )
+ else:
+ image = self.__select_image( image_ref )
+ if image.virtualization_type not in virtualization_types:
+ raise RuntimeError(
+ "Role and type support virtualization types %s but image only supports %s" % (
+ virtualization_types, image.virtualization_type) )
+ return image
+
+ # Note: The name of all spot-related keyword arguments should begin with 'spot_'
+
+ def prepare( self, ec2_keypair_globs,
+ instance_type=None, image_ref=None, virtualization_type=None,
+ spot_bid=None, spot_launch_group=None, spot_auto_zone=False,
+ vpc_id=None, subnet_id=None,
+ **options ):
+ """
+ Prepare to create an EC2 instance represented by this box. Return a dictionary with
+ keyword arguments to boto.ec2.connection.EC2Connection.run_instances() that can be used
+ to create the instance.
+
+ :param list[str] ec2_keypair_globs: The names of EC2 keypairs whose public key is to be
+ injected into the instance to facilitate SSH logins. For the first listed keypair a
+ matching private key needs to be present locally. Note that after the agent is installed
+ on the box it will
+
+ :param str instance_type: The type of instance to create, e.g. m1.small or t1.micro.
+
+ :param int|str image_ref: The ordinal or AMI ID of the image to boot from. If None,
+ the return value of self._base_image() will be used.
+
+ :param str virtualization_type: The desired virtualization type to use for the instance
+
+ :param int num_instances: The number of instances to prepare for
+
+ :param float spot_bid: Dollar amount to bid for spot instances. If None, an on-demand
+ instance will be created
+
+ :param str spot_launch_group: Specify a launch group in your Spot instance request to tell
+ Amazon EC2 to launch a set of Spot instances only if it can launch them all. In addition,
+ if the Spot service must terminate one of the instances in a launch group (for example,
+ if the Spot price rises above your bid price), it must terminate them all.
+
+ :param bool spot_auto_zone: Use heuristic to automatically choose the "best" availability
+ zone to launch spot instances in. Can't be combined with subnet_id. Overrides the
+ availability zone in the context.
+
+ :param: str vpc_id: The ID of a VPC to create the instance and associated security group
+ in. If this argument is None or absent and the AWS account has a default VPC, the default
+ VPC will be used. This is the most common case. If this argument is None or absent and
+ the AWS account has EC2 Classic enabled and the selected instance type supports EC2
+ classic mode, no VPC will be used. If this argument is None or absent and the AWS account
+ has no default VPC and an instance type that only supports VPC is used, an exception will
+ be raised.
+
+ :param: str subnet_id: The ID of a subnet to allocate instance's private IP address from.
+ Can't be combined with spot_auto_zone. The specified subnet must belong to the specified
+ VPC (or the default VPC if none was specified) and reside in the context's availability
+ zone. If this argument is None or absent, a subnet will be chosen automatically.
+
+ :param dict options: Additional, role-specific options can be specified. These options
+ augment the options associated with the givem image.
+ """
+ if spot_launch_group is not None and spot_bid is None:
+ raise UserError( 'Need a spot bid when specifying a launch group for spot instances' )
+
+ if spot_auto_zone and spot_bid is None:
+ raise UserError( 'Need a spot bid for automatically chosing a zone for spot instances' )
+
+ if subnet_id is not None and spot_auto_zone:
+ raise UserError( 'Cannot automatically choose an availability zone for spot instances '
+ 'while placing them in an explicitly defined subnet since the subnet '
+ 'implies a specific availability zone.' )
+
+ if self.instance_id is not None:
+ raise AssertionError( 'Instance already bound or created' )
+
+ if instance_type is None:
+ instance_type = self.recommended_instance_type( )
+
+ virtualization_types = self.__get_virtualization_types( instance_type, virtualization_type )
+ image = self.__get_image( virtualization_types, image_ref )
+ self.image_id = image.id
+
+ zone = self.ctx.availability_zone
+
+ security_group_ids = self.__setup_security_groups( vpc_id=vpc_id )
+ if vpc_id is not None and subnet_id is None:
+ log.info( 'Looking up suitable subnet for VPC %s in zone %s.', vpc_id, zone )
+ subnets = self.ctx.vpc.get_all_subnets( filters={ 'vpc-id': vpc_id,
+ 'availability-zone': zone } )
+ if subnets:
+ subnet_id = subnets[ 0 ].id
+ else:
+ raise UserError( 'There is no subnet belonging to VPC %s in availability zone %s. '
+ 'Please create a subnet manually using the VPC console.'
+ % (vpc_id, zone) )
+
+ options = dict( image.tags, **options )
+ self._set_instance_options( options )
+
+ self._populate_ec2_keypair_globs( ec2_keypair_globs )
+ ec2_keypairs = self.ctx.expand_keypair_globs( ec2_keypair_globs )
+ if not ec2_keypairs:
+ raise UserError( "No key pairs matching '%s' found." % ' '.join( ec2_keypair_globs ) )
+ if ec2_keypairs[ 0 ].name != ec2_keypair_globs[ 0 ]:
+ raise UserError( "The first key pair name can't be a glob." )
+ self.ec2_keypairs = ec2_keypairs
+ self.ec2_keypair_globs = ec2_keypair_globs
+
+ spec = Expando( instance_type=instance_type,
+ key_name=ec2_keypairs[ 0 ].name,
+ placement=zone,
+ security_group_ids=security_group_ids,
+ subnet_id=subnet_id,
+ instance_profile_arn=self.get_instance_profile_arn( ) )
+ self._spec_block_device_mapping( spec, image )
+ self._spec_spot_market( spec,
+ bid=spot_bid,
+ launch_group=spot_launch_group,
+ auto_zone=spot_auto_zone )
+ return spec
+
+ def _spec_spot_market( self, spec, bid, launch_group, auto_zone ):
+ if bid is not None:
+ if not ec2_instance_types[ spec.instance_type ].spot_availability:
+ raise UserError( 'The instance type %s is not available on the spot market.' %
+ spec.instance_type )
+ if auto_zone:
+ spec.placement = self._optimize_spot_bid( spec.instance_type, bid )
+ spec.price = bid
+ if launch_group is not None:
+ spec.launch_group = self.ctx.to_aws_name( launch_group )
+
+ ZoneTuple = namedtuple( 'ZoneTuple', [ 'name', 'price_deviation' ] )
+
+ @classmethod
+ def _choose_spot_zone( cls, zones, bid, spot_history ):
+ """
+ Returns the zone to put the spot request based on, in order of priority:
+
+ 1) zones with prices currently under the bid
+
+ 2) zones with the most stable price
+
+ :param list[boto.ec2.zone.Zone] zones:
+ :param float bid:
+ :param list[boto.ec2.spotpricehistory.SpotPriceHistory] spot_history:
+
+ :rtype: str
+ :return: the name of the selected zone
+
+ >>> from collections import namedtuple
+ >>> FauxHistory = namedtuple( 'FauxHistory', [ 'price', 'availability_zone' ] )
+ >>> ZoneTuple = namedtuple( 'ZoneTuple', [ 'name' ] )
+
+ >>> zones = [ ZoneTuple( 'us-west-2a' ), ZoneTuple( 'us-west-2b' ) ]
+ >>> spot_history = [ FauxHistory( 0.1, 'us-west-2a' ), \
+ FauxHistory( 0.2,'us-west-2a'), \
+ FauxHistory( 0.3,'us-west-2b'), \
+ FauxHistory( 0.6,'us-west-2b')]
+ >>> # noinspection PyProtectedMember
+ >>> Box._choose_spot_zone( zones, 0.15, spot_history )
+ 'us-west-2a'
+
+ >>> spot_history=[ FauxHistory( 0.3, 'us-west-2a' ), \
+ FauxHistory( 0.2, 'us-west-2a' ), \
+ FauxHistory( 0.1, 'us-west-2b'), \
+ FauxHistory( 0.6, 'us-west-2b') ]
+ >>> # noinspection PyProtectedMember
+ >>> Box._choose_spot_zone(zones, 0.15, spot_history)
+ 'us-west-2b'
+
+ >>> spot_history={ FauxHistory( 0.1, 'us-west-2a' ), \
+ FauxHistory( 0.7, 'us-west-2a' ), \
+ FauxHistory( 0.1, "us-west-2b" ), \
+ FauxHistory( 0.6, 'us-west-2b' ) }
+ >>> # noinspection PyProtectedMember
+ >>> Box._choose_spot_zone(zones, 0.15, spot_history)
+ 'us-west-2b'
+ """
+
+ # Create two lists of tuples of form: [ (zone.name, std_deviation), ... ] one for zones
+ # over the bid price and one for zones under bid price. Each are sorted by increasing
+ # standard deviation values.
+ #
+ markets_under_bid, markets_over_bid = [ ], [ ]
+ for zone in zones:
+ zone_histories = filter( lambda zone_history:
+ zone_history.availability_zone == zone.name, spot_history )
+ price_deviation = std_dev( [ history.price for history in zone_histories ] )
+ recent_price = zone_histories[ 0 ]
+ zone_tuple = cls.ZoneTuple( name=zone.name, price_deviation=price_deviation )
+ (markets_over_bid, markets_under_bid)[ recent_price.price < bid ].append( zone_tuple )
+
+ return min( markets_under_bid or markets_over_bid,
+ key=attrgetter( 'price_deviation' ) ).name
+
+ def _optimize_spot_bid( self, instance_type, spot_bid ):
+ """
+ Check whether the bid is sane and makes an effort to place the instance in a sensible zone.
+ """
+ spot_history = self._get_spot_history( instance_type )
+ self._check_spot_bid( spot_bid, spot_history )
+ zones = self.ctx.ec2.get_all_zones( )
+ most_stable_zone = self._choose_spot_zone( zones, spot_bid, spot_history )
+ log.info( "Placing spot instances in zone %s.", most_stable_zone )
+ return most_stable_zone
+
+ @staticmethod
+ def _check_spot_bid( spot_bid, spot_history ):
+ """
+ Prevents users from potentially over-paying for instances
+
+ Note: this checks over the whole region, not a particular zone
+
+ :param spot_bid: float
+
+ :type spot_history: list[SpotPriceHistory]
+
+ :raises UserError: if bid is > 2X the spot price's average
+
+ >>> from collections import namedtuple
+ >>> FauxHistory = namedtuple( "FauxHistory", [ "price", "availability_zone" ] )
+ >>> spot_data = [ FauxHistory( 0.1, "us-west-2a" ), \
+ FauxHistory( 0.2, "us-west-2a" ), \
+ FauxHistory( 0.3, "us-west-2b" ), \
+ FauxHistory( 0.6, "us-west-2b" ) ]
+ >>> # noinspection PyProtectedMember
+ >>> Box._check_spot_bid( 0.1, spot_data )
+ >>> # noinspection PyProtectedMember
+
+ # >>> Box._check_spot_bid( 2, spot_data )
+ Traceback (most recent call last):
+ ...
+ UserError: Your bid $ 2.000000 is more than double this instance type's average spot price ($ 0.300000) over the last week
+ """
+ average = mean( [ datum.price for datum in spot_history ] )
+ if spot_bid > average * 2:
+ log.warn( "Your bid $ %f is more than double this instance type's average "
+ "spot price ($ %f) over the last week", spot_bid, average )
+
+ def _get_spot_history( self, instance_type ):
+ """
+ Returns list of 1,000 most recent spot market data points represented as SpotPriceHistory
+ objects. Note: The most recent object/data point will be first in the list.
+
+ :rtype: list[SpotPriceHistory]
+ """
+
+ one_week_ago = datetime.datetime.now( ) - datetime.timedelta( days=7 )
+ spot_data = self.ctx.ec2.get_spot_price_history( start_time=one_week_ago.isoformat( ),
+ instance_type=instance_type,
+ product_description="Linux/UNIX" )
+ spot_data.sort( key=attrgetter( "timestamp" ), reverse=True )
+ return spot_data
+
+ def create( self, spec,
+ num_instances=1,
+ wait_ready=True,
+ terminate_on_error=True,
+ spot_timeout=None,
+ spot_tentative=False,
+ cluster_ordinal=0,
+ executor=None ):
+ """
+ Create the EC2 instance represented by this box, and optionally, any number of clones of
+ that instance. Optionally wait for the instances to be ready.
+
+ If this box was prepared to launch clones, and multiple instances were indeed launched by
+ EC2, clones of this Box instance will be created, one clone for each additional instance.
+ This Box instance will represent the first EC2 instance while the clones will represent
+ the additional EC2 instances. The given executor will be used to handle post-creation
+ activity on each instance.
+
+ :param spec: a dictionary with keyword arguments to request_spot_instances,
+ if the 'price' key is present, or run_instances otherwise.
+
+ :param bool wait_ready: whether to wait for all instances to be ready. The waiting for an
+ instance will be handled as a task that is submitted to the given executor.
+
+ :param bool terminate_on_error: If True, terminate instance on errors. If False,
+ never terminate any instances. Unfulfilled spot requests will always be cancelled.
+
+ :param cluster_ordinal: the cluster ordinal to be assigned to the first instance or an
+ iterable yielding ordinals for the instances
+
+ :param executor: a callable that accepts two arguments: a task function and a sequence of
+ task arguments. The executor applies the task function to the given sequence of
+ arguments. It may choose to do so immediately, i.e. synchronously or at a later time,
+ i.e asynchronously. If None, a synchronous executor will be used by default.
+
+ :rtype: list[Box]
+ """
+ if isinstance( cluster_ordinal, int ):
+ cluster_ordinal = count( start=cluster_ordinal )
+
+ if executor is None:
+ def executor( f, args ):
+ f( *args )
+
+ adopters = iter( concat( self, self.clones( ) ) )
+ boxes = [ ]
+ pending_ids = set( )
+ pending_ids_lock = threading.RLock( )
+
+ def adopt( adoptees ):
+ """
+ :type adoptees: Iterator[Instance]
+ """
+ pending_ids.update( i.id for i in adoptees )
+ for box, instance in izip( adopters, adoptees ):
+ box.adopt( instance, next( cluster_ordinal ) )
+ if not wait_ready:
+ # Without wait_ready, an instance is done as soon as it has been adopted.
+ pending_ids.remove( instance.id )
+ boxes.append( box )
+
+ try:
+ if 'price' in spec:
+ price = spec.price
+ del spec.price
+ tags = dict(cluster_name=self.cluster_name) if self.cluster_name else None
+ # Spot requests are fulfilled in batches. A batch could consist of one instance,
+ # all requested instances or a subset thereof. As soon as a batch comes back from
+ # _create_spot_instances(), we will want to adopt every instance in it. Part of
+ # adoption is tagging which is crucial for the boot code running on cluster nodes.
+ for batch in create_spot_instances( self.ctx.ec2, price, self.image_id, spec,
+ num_instances=num_instances,
+ timeout=spot_timeout,
+ tentative=spot_tentative,
+ tags=tags):
+ adopt( batch )
+ else:
+ adopt( create_ondemand_instances( self.ctx.ec2, self.image_id, spec,
+ num_instances=num_instances ) )
+ if spot_tentative:
+ if not boxes: return boxes
+ else:
+ assert boxes
+ assert boxes[ 0 ] is self
+
+ if wait_ready:
+ def wait_ready_callback( box ):
+ try:
+ # noinspection PyProtectedMember
+ box._wait_ready( { 'pending' }, first_boot=True )
+ except:
+ with panic( log ):
+ if terminate_on_error:
+ log.warn( 'Terminating instance ...' )
+ self.ctx.ec2.terminate_instances( [ box.instance_id ] )
+ finally:
+ with pending_ids_lock:
+ pending_ids.remove( box.instance_id )
+
+ self._batch_wait_ready( boxes, executor, wait_ready_callback )
+ except:
+ if terminate_on_error:
+ with panic( log ):
+ with pending_ids_lock:
+ unfinished_ids_list = list( pending_ids )
+ if unfinished_ids_list:
+ log.warn( 'Terminating instances ...' )
+ self.ctx.ec2.terminate_instances( unfinished_ids_list )
+ raise
+ else:
+ return boxes
+
+ def _batch_wait_ready( self, boxes, executor, callback ):
+ if len( boxes ) == 1:
+ # For a single instance, self._wait_ready will wait for the instance to change to
+ # running ...
+ executor( callback, (self,) )
+ else:
+ # .. but for multiple instances it is more efficient to wait for all of the
+ # instances together.
+ boxes_by_id = { box.instance_id: box for box in boxes }
+ # Wait for instances to enter the running state and as they do, pass them to
+ # the executor where they are waited on concurrently.
+ num_running, num_other = 0, 0
+ # TODO: timeout
+ instances = (box.instance for box in boxes)
+ for instance in wait_instances_running( self.ctx.ec2, instances ):
+ box = boxes_by_id[ instance.id ]
+ # equivalent to the instance.update() done in _wait_ready()
+ box.instance = instance
+ if instance.state == 'running':
+ executor( callback, (box,) )
+ num_running += 1
+ else:
+ log.info( 'Instance %s in unexpected state %s.',
+ instance.id, instance.state )
+ num_other += 1
+ assert num_running + num_other == len( boxes )
+ if not num_running:
+ raise RuntimeError( 'None of the instances entered the running state.' )
+ if num_other:
+ log.warn( '%i instance(s) entered a state other than running.', num_other )
+
+ def clones( self ):
+ """
+ Generates infinite numbers of clones of this box.
+
+ :rtype: Iterator[Box]
+ """
+ while True:
+ clone = copy( self )
+ clone.unbind( )
+ yield clone
+
+ def adopt( self, instance, cluster_ordinal ):
+ """
+ Link the given newly created EC2 instance with this box.
+ """
+ log.info( '... created %s.', instance.id )
+ self.instance = instance
+ self.cluster_ordinal = cluster_ordinal
+ if self.cluster_name is None:
+ self.cluster_name = self.instance_id
+ self._on_instance_created( )
+
+ def _set_instance_options( self, options ):
+ """
+ Initialize optional instance attributes from the given dictionary mapping option names to
+ option values. The keys in the dictionary must be strings, the values can be any type.
+ This method handles the conversion of values from string transparently. If a key is
+ missing this method will provide a default.
+ """
+ # Relies on idempotence of int
+ self.generation = int( options.get( 'generation' ) or 0 )
+ self.cluster_ordinal = int( options.get( 'cluster_ordinal' ) or 0 )
+ self.cluster_name = options.get( 'cluster_name' )
+ for option in self.get_role_options( ):
+ value = options.get( option.name )
+ if value is not None:
+ self.role_options[ option.name ] = option.type( value )
+
+ def _get_instance_options( self ):
+ """
+ Return a dictionary specifying the tags an instance of this role should be tagged with.
+ Keys and values should be strings.
+ """
+ options = dict( Name=self.ctx.to_aws_name( self.role( ) ),
+ generation=str( self.generation ),
+ cluster_ordinal=str( self.cluster_ordinal ),
+ cluster_name=self.cluster_name )
+ for option in self.get_role_options( ):
+ value = self.role_options.get( option.name )
+ if value is not None:
+ options[ option.name ] = option.repr( value )
+ return options
+
+ def _get_image_options( self ):
+ """
+ Return a dictionary specifying the tags an image of an instance of this role should be
+ tagged with. Keys and values should be strings.
+ """
+ options = dict( generation=str( self.generation + 1 ) )
+ for option in self.get_role_options( ):
+ if option.inherited:
+ value = self.role_options.get( option.name )
+ if value is not None:
+ options[ option.name ] = option.repr( value )
+ return options
+
+ # noinspection PyClassHasNoInit
+ class RoleOption( namedtuple( "_RoleOption", 'name type repr help inherited' ) ):
+ """
+ Describes a role option, i.e. an instance option that is specific to boxes of a
+ particular role. Name is the name of the option, type is a function converting an option
+ value from a string to the option's native type, repr is the inverse of type, help is a
+ help text describing the option and inherited is a boolean controlling whether the option
+ is inherited by images created from an instance.
+ """
+
+ def to_dict( self ):
+ return self._asdict( )
+
+ def type( self, value ):
+ try:
+ # noinspection PyUnresolvedReferences
+ return super( Box.RoleOption, self ).type( value )
+ except ValueError:
+ raise UserError(
+ "'%s' is not a valid value for option %s" % (value, self.name) )
+
+ @classmethod
+ def get_role_options( cls ):
+ """
+ Return a list of RoleOption objects, one for each supported option supported by this role.
+
+ :rtype: list[Box.RoleOption]
+ """
+ return [ ]
+
+ def _on_instance_created( self ):
+ """
+ Invoked right after an instance was created.
+ """
+ log.info( 'Tagging instance ... ' )
+ tags_dict = self._get_instance_options( )
+ tag_object_persistently( self.instance, tags_dict )
+ log.info( '... instance tagged %r.', tags_dict )
+
+ def _on_instance_running( self, first_boot ):
+ """
+ Invoked while creating, binding or starting an instance, right after the instance entered
+ the running state.
+
+ :param first_boot: True if this is the first time the instance enters the running state
+ since its creation
+ """
+ pass
+
+ def _on_instance_ready( self, first_boot ):
+ """
+ Invoked while creating, binding or starting an instance, right after the instance was
+ found to be ready.
+
+ :param first_boot: True if the instance was booted for the first time, i.e. if this is
+ the first time the instance becomes ready since its creation, False if the instance was
+ booted but not for the first time, None if it is not clear whether the instance was
+ booted, e.g. after binding.
+ """
+ if first_boot and not self._manages_keys_internally( ):
+ self.__inject_authorized_keys( self.ec2_keypairs[ 1: ] )
+
+ def bind( self,
+ instance=None,
+ instance_id=None,
+ ordinal=None, cluster_name=None,
+ wait_ready=True, verbose=True ):
+ """
+ Verify that the EC instance represented by this box exists and, optionally,
+ wait until it is ready, i.e. that it is is running, has a public host name and can be
+ connected to via SSH. If the box doesn't exist and exception will be raised.
+
+ :param wait_ready: if True, wait for the instance to be ready
+ """
+ if wait_ready: verbose = True
+ if self.instance is None:
+ if verbose: log.info( 'Binding to instance ... ' )
+ if instance is not None:
+ assert ordinal is None and cluster_name is None and instance_id is None
+ name = instance.tags[ 'Name' ]
+ assert self.ctx.contains_aws_name( name )
+ assert self.ctx.base_name( self.ctx.from_aws_name( name ) ) == self.role( )
+ elif instance_id is not None:
+ assert ordinal is None
+ try:
+ instance = self.ctx.ec2.get_only_instances( instance_id )[ 0 ]
+ except EC2ResponseError as e:
+ if e.error_code.startswith( 'InvalidInstanceID' ):
+ raise UserError( "No instance with ID '%s'." % instance_id )
+ try:
+ name = instance.tags[ 'Name' ]
+ except KeyError:
+ raise UserError( "Instance %s does not have a Name tag." % instance.id )
+ if not self.ctx.try_contains_aws_name( name ):
+ raise UserError( "Instance %s with Name tag '%s' is not in namespace %s."
+ % (instance.id, name, self.ctx.namespace) )
+ if self.ctx.base_name( self.ctx.from_aws_name( name ) ) != self.role( ):
+ raise UserError( "Instance %s with Name tag '%s' is not a %s." %
+ (instance.id, name, self.role( )) )
+ if cluster_name is not None:
+ actual_cluster_name = instance.tags.get( 'cluster_name' )
+ if actual_cluster_name is not None and actual_cluster_name != cluster_name:
+ raise UserError( "Instance %s has cluster name '%s', not '%s'." %
+ (instance.id, actual_cluster_name, cluster_name) )
+ else:
+ instance = self.__get_instance_by_ordinal( ordinal=ordinal,
+ cluster_name=cluster_name )
+ self.instance = instance
+ self.image_id = self.instance.image_id
+ options = dict( self.instance.tags )
+ self._set_instance_options( options )
+ if wait_ready:
+ self._wait_ready( from_states={ 'pending' }, first_boot=None )
+ else:
+ if verbose: log.info( '... bound to %s.', self.instance.id )
+ return self
+
+ def unbind( self ):
+ """
+ Unset all state in this box that would be specific to an individual EC2 instance. This
+ method prepares this box for being bound to another EC2 instance.
+ """
+ self.instance = None
+ self.cluster_ordinal = None
+
+ def list( self, wait_ready=False, **tags ):
+ return [ box.bind( instance=instance, wait_ready=wait_ready, verbose=False )
+ for box, instance in izip( concat( self, self.clones( ) ),
+ self.__list_instances( **tags ) ) ]
+
+ def __list_instances( self, **tags ):
+ """
+ Lookup and return a list of instance performing this box' role.
+
+ :rtype: list[Instance]
+ """
+ name = self.ctx.to_aws_name( self.role( ) )
+ filters = { 'tag:Name': name }
+ for k, v in tags.iteritems( ):
+ if v is not None:
+ filters[ 'tag:' + k ] = v
+ reservations = self.ctx.ec2.get_all_instances( filters=filters )
+ instances = [ i for r in reservations for i in r.instances if i.state != 'terminated' ]
+ instances.sort( key=self.__ordinal_sort_key )
+ return instances
+
+ def __ordinal_sort_key( self, instance ):
+ return instance.launch_time, instance.private_ip_address, instance.id
+
+ def __get_instance_by_ordinal( self, ordinal=None, cluster_name=None ):
+ """
+ Get the n-th instance that performs this box' role
+
+ :param ordinal: the index of the instance based on the ordering by launch_time
+
+ :rtype: boto.ec2.instance.Instance
+ """
+ instances = self.__list_instances( cluster_name=cluster_name )
+ if not instances:
+ raise UserError(
+ "No instance performing role %s in namespace %s" % (
+ self.role( ), self.ctx.namespace) if cluster_name is None
+ else "No instance performing role %s in cluster %s and namespace %s" % (
+ self.role( ), cluster_name, self.ctx.namespace) )
+ if ordinal is None:
+ if len( instances ) > 1:
+ raise UserError( "More than one instance performing role '%s'. Please specify an "
+ "ordinal, a cluster name or both to disambiguate." % self.role( ) )
+ ordinal = 0
+ try:
+ return instances[ ordinal ]
+ except IndexError:
+ raise UserError(
+ "No instance performing role %s in namespace %s has ordinal %i" % (
+ self.role( ), self.ctx.namespace, ordinal) if cluster_name is None
+ else "No instance performing role %s in cluster %s and namespace %s has ordinal %i" % (
+ self.role( ), cluster_name, self.ctx.namespace, ordinal) )
+
+ def _image_block_device_mapping( self ):
+ """
+ Returns the block device mapping to be used for the image. The base implementation
+ returns None, indicating that all volumes attached to the instance should be included in
+ the image.
+ """
+ return None
+
+ def image( self ):
+ """
+ Create an image (AMI) of the EC2 instance represented by this box and return its ID.
+ The EC2 instance needs to use an EBS-backed root volume. The box must be stopped or
+ an exception will be raised.
+ """
+ # We've observed instance state to flap from stopped back to stoppping. As a best effort
+ # we wait for it to flap back to stopped.
+ wait_transition( self.instance, { 'stopping' }, 'stopped' )
+
+ log.info( "Creating image ..." )
+ timestamp = time.strftime( '%Y-%m-%d_%H-%M-%S' )
+ image_name = self.ctx.to_aws_name( self._image_name_prefix( ) + "_" + timestamp )
+ image_id = self.ctx.ec2.create_image(
+ instance_id=self.instance_id,
+ name=image_name,
+ block_device_mapping=self._image_block_device_mapping( ) )
+ while True:
+ try:
+ image = self.ctx.ec2.get_image( image_id )
+ tag_object_persistently( image, self._get_image_options( ) )
+ wait_transition( image, { 'pending' }, 'available' )
+ log.info( "... created %s (%s).", image.id, image.name )
+ break
+ except self.ctx.ec2.ResponseError as e:
+ # FIXME: I don't think get_image can throw this, it should be outside the try
+ if e.error_code != 'InvalidAMIID.NotFound':
+ raise
+ # There seems to be another race condition in EC2 that causes a freshly created image to
+ # not be included in queries other than by AMI ID.
+ log.info( 'Checking if image %s is discoverable ...' % image_id )
+ while True:
+ if image_id in (_.id for _ in self.list_images( )):
+ log.info( '... image now discoverable.' )
+ break
+ log.info( '... image %s not yet discoverable, trying again in %is ...', image_id,
+ a_short_time )
+ time.sleep( a_short_time )
+ return image_id
+
+ def stop( self ):
+ """
+ Stop the EC2 instance represented by this box. Stopped instances can be started later using
+ :py:func:`Box.start`.
+ """
+ self.__assert_state( 'running' )
+ log.info( 'Stopping instance ...' )
+ self.ctx.ec2.stop_instances( [ self.instance_id ] )
+ wait_transition( self.instance,
+ from_states={ 'running', 'stopping' },
+ to_state='stopped' )
+ log.info( '... instance stopped.' )
+
+ def start( self ):
+ """
+ Start the EC2 instance represented by this box
+ """
+ self.__assert_state( 'stopped' )
+ log.info( 'Starting instance, ... ' )
+ self.ctx.ec2.start_instances( [ self.instance_id ] )
+ # Not 100% sure why from_states includes 'stopped' but I think I noticed that there is a
+ # short interval after start_instances returns during which the instance is still in
+ # stopped before it goes into pending
+ self._wait_ready( from_states={ 'stopped', 'pending' }, first_boot=False )
+
+ def reboot( self ):
+ """
+ Reboot the EC2 instance represented by this box. When this method returns,
+ the EC2 instance represented by this object will likely have different public IP and
+ hostname.
+ """
+ # There is reboot_instances in the API but reliably detecting the
+ # state transitions is hard. So we stop and start instead.
+ self.stop( )
+ self.start( )
+
+ def terminate( self, wait=True ):
+ """
+ Terminate the EC2 instance represented by this box.
+ """
+ if self.instance_id is not None:
+ instance = self.instance
+ if instance.state != 'terminated':
+ log.info( 'Terminating instance ...' )
+ self.ctx.ec2.terminate_instances( [ self.instance_id ] )
+ if wait:
+ wait_transition( instance,
+ from_states={ 'running', 'shutting-down', 'stopped' },
+ to_state='terminated' )
+ log.info( '... instance terminated.' )
+
+ def _attach_volume( self, volume_helper, device ):
+ volume_helper.attach( self.instance_id, device )
+
+ def _execute_task( self, task, user ):
+ """
+ Execute the given Fabric task on the EC2 instance represented by this box
+ """
+ if not callable( task ): task = task( self )
+ # using IP instead of host name yields more compact log lines
+ # host = "%s@%s" % ( user, self.ip_address )
+ with settings( user=user ):
+ host = self.ip_address
+ return execute( task, hosts=[ host ] )[ host ]
+
+ def __assert_state( self, expected_state ):
+ """
+ Raises a UserError if the instance represented by this object is not in the given state.
+
+ :param expected_state: the expected state
+ :return: the instance
+ :rtype: boto.ec2.instance.Instance
+ """
+ actual_state = self.instance.state
+ if actual_state != expected_state:
+ raise UserError( "Expected instance state '%s' but got '%s'"
+ % (expected_state, actual_state) )
+
+ def _wait_ready( self, from_states, first_boot ):
+ """
+ Wait until the given instance transistions from stopped or pending state to being fully
+ running and accessible via SSH.
+
+ :param from_states: the set of states the instance may be in when this methods is
+ invoked, any other state will raise an exception.
+ :type from_states: set of str
+
+ :param first_boot: True if the instance is currently booting for the first time,
+ None if the instance isn't booting, False if the instance is booting but not for the
+ first time.
+ """
+ log.info( "... waiting for instance %s ... ", self.instance.id )
+ wait_transition( self.instance, from_states, 'running' )
+ self._on_instance_running( first_boot )
+ log.info( "... running, waiting for assignment of public IP ... " )
+ self.__wait_public_ip_assigned( self.instance )
+ log.info( "... assigned, waiting for SSH port ... " )
+ self.__wait_ssh_port_open( )
+ log.info( "... open ... " )
+ if first_boot is not None:
+ log.info( "... testing SSH ... " )
+ self.__wait_ssh_working( )
+ log.info( "... SSH working ..., " )
+ log.info( "... instance ready." )
+ self._on_instance_ready( first_boot )
+
+ def __wait_public_ip_assigned( self, instance ):
+ """
+ Wait until the instances has a public IP address assigned to it.
+
+ :type instance: boto.ec2.instance.Instance
+ """
+ while not instance.ip_address or not instance.public_dns_name:
+ time.sleep( a_short_time )
+ instance.update( )
+
+ def __wait_ssh_port_open( self ):
+ """
+ Wait until the instance represented by this box is accessible via SSH.
+
+ :return: the number of unsuccessful attempts to connect to the port before a the first
+ success
+ """
+ for i in count( ):
+ s = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
+ try:
+ s.settimeout( a_short_time )
+ s.connect( (self.ip_address, 22) )
+ return i
+ except socket.error:
+ pass
+ finally:
+ s.close( )
+
+ class IgnorePolicy( MissingHostKeyPolicy ):
+ def missing_host_key( self, client, hostname, key ):
+ pass
+
+ def __wait_ssh_working( self ):
+ while True:
+ client = None
+ try:
+ client = self._ssh_client( )
+ stdin, stdout, stderr = client.exec_command( 'echo hi' )
+ try:
+ line = stdout.readline( )
+ if line == 'hi\n':
+ return
+ else:
+ raise AssertionError( "Read unexpected line '%s'" % line )
+ finally:
+ stdin.close( )
+ stdout.close( )
+ stderr.close( )
+ except AssertionError:
+ raise
+ except KeyboardInterrupt:
+ raise
+ except Exception as e:
+ logging.info( e )
+ finally:
+ if client is not None:
+ client.close( )
+ time.sleep( a_short_time )
+
+ def _ssh_client( self ):
+ client = SSHClient( )
+ client.set_missing_host_key_policy( self.IgnorePolicy( ) )
+ client.connect( hostname=self.ip_address,
+ username=self.admin_account( ),
+ timeout=a_short_time )
+ return client
+
+ def ssh( self, user=None, command=None ):
+ if command is None: command = [ ]
+ status = subprocess32.call( self._ssh_args( user, command ) )
+ # According to ssh(1), SSH returns the status code of the remote process or 255 if
+ # something else went wrong. Python exits with status 1 if an uncaught exception is
+ # thrown. Since this is also the default status code that most other programs return on
+ # failure, there is no easy way to distinguish between failures in programs run remotely
+ # by cgcloud ssh and something being wrong in cgcloud.
+ if status == 255:
+ raise RuntimeError( 'ssh failed' )
+ return status
+
+ def rsync( self, args, user=None, ssh_opts=None ):
+ ssh_args = self._ssh_args( user, [ ] )
+ if ssh_opts:
+ ssh_args.append( ssh_opts )
+ subprocess32.check_call( [ 'rsync', '-e', ' '.join( ssh_args ) ] + args )
+
+ def _ssh_args( self, user, command ):
+ if user is None: user = self.default_account( )
+ # Using host name instead of IP allows for more descriptive known_hosts entries and
+ # enables using wildcards like *.compute.amazonaws.com Host entries in ~/.ssh/config.
+ return [ 'ssh', '%s@%s' % (user, self.host_name), '-A' ] + command
+
+ @fabric_task
+ def __inject_authorized_keys( self, ec2_keypairs ):
+ with closing( StringIO( ) ) as authorized_keys:
+ get( local_path=authorized_keys, remote_path='~/.ssh/authorized_keys' )
+ authorized_keys.seek( 0 )
+ ssh_pubkeys = set( l.strip( ) for l in authorized_keys.readlines( ) )
+ for ec2_keypair in ec2_keypairs:
+ ssh_pubkey = self.__download_ssh_pubkey( ec2_keypair )
+ if ssh_pubkey: ssh_pubkeys.add( ssh_pubkey )
+ authorized_keys.seek( 0 )
+ authorized_keys.truncate( )
+ authorized_keys.write( '\n'.join( ssh_pubkeys ) )
+ authorized_keys.write( '\n' )
+ put( local_path=authorized_keys, remote_path='~/.ssh/authorized_keys' )
+
+ def __download_ssh_pubkey( self, keypair ):
+ try:
+ return self.ctx.download_ssh_pubkey( keypair ).strip( )
+ except UserError as e:
+ log.warn( 'Exception while downloading SSH public key from S3', e )
+ return None
+
+ @fabric_task
+ def _propagate_authorized_keys( self, user, group=None ):
+ """
+ Ensure that the given user account accepts SSH connections for the same keys as the
+ current user. The current user must have sudo.
+
+ :param user:
+ the name of the user to propagate the current user's authorized keys to
+
+ :param group:
+ the name of the group that should own the files and directories that are created by
+ this method, defaults to the default group of the given user
+ """
+
+ if group is None:
+ group = run( "getent group $(getent passwd %s | cut -d : -f 4) "
+ "| cut -d : -f 1" % user )
+ args = dict( src_user=self.admin_account( ),
+ dst_user=user,
+ dst_group=group )
+ sudo( 'install -d ~{dst_user}/.ssh '
+ '-m 755 -o {dst_user} -g {dst_group}'.format( **args ) )
+ sudo( 'install -t ~{dst_user}/.ssh ~{src_user}/.ssh/authorized_keys '
+ '-m 644 -o {dst_user} -g {dst_group}'.format( **args ) )
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return 't2.micro' if 'hvm' in cls.supported_virtualization_types( ) else 't1.micro'
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ """
+ Returns the virtualization types supported by this box in order of preference, preferred
+ types first.
+ """
+ return [ 'hvm', 'paravirtual' ]
+
+ def list_images( self ):
+ """
+ :rtype: list of boto.ec2.image.Image
+ """
+ image_name_pattern = self.ctx.to_aws_name( self._image_name_prefix( ) + '_' ) + '*'
+ images = self.ctx.ec2.get_all_images( filters={ 'name': image_name_pattern } )
+ images.sort( key=attrgetter( 'name' ) ) # that sorts by date, effectively
+ return images
+
+ @abstractmethod
+ def _register_init_command( self, cmd ):
+ """
+ Register a shell command to be executed towards the end of system initialization. The
+ command should work when set -e is in effect.
+ """
+ raise NotImplementedError( )
+
+ def get_instance_profile_arn( self ):
+ """
+ Prepares the instance profile to be used for this box and returns its ARN
+ """
+ iam_role_name, policies = self._get_iam_ec2_role( )
+ aws_role_name = self.ctx.setup_iam_ec2_role( self._hash_iam_role_name( iam_role_name ),
+ policies )
+ log.info( 'Set up instance profile using hashed IAM role name %s, derived from %s.',
+ aws_role_name, iam_role_name )
+ aws_instance_profile_name = self.ctx.to_aws_name( self.role( ) )
+ try:
+ profile = self.ctx.iam.get_instance_profile( aws_instance_profile_name )
+ except BotoServerError as e:
+ if e.status == 404:
+ profile = self.ctx.iam.create_instance_profile( aws_instance_profile_name )
+ profile = profile.create_instance_profile_response.create_instance_profile_result
+ else:
+ raise
+ else:
+ profile = profile.get_instance_profile_response.get_instance_profile_result
+
+ profile = profile.instance_profile
+ profile_arn = profile.arn
+ # Note that Boto does not correctly parse the result from get/create_instance_profile.
+ # The 'roles' field should be an instance of ListElement, whereas it currently is a
+ # simple, dict-like Element. We can check a dict-like element for size but since all
+ # children have the same name -- 'member' in this case -- the dictionary will always have
+ # just one entry. Luckily, IAM currently only supports one role per profile so this Boto
+ # bug does not affect us much.
+ if len( profile.roles ) > 1:
+ raise RuntimeError( 'Did not expect profile to contain more than one role' )
+ elif len( profile.roles ) == 1:
+ # this should be profile.roles[0].role_name
+ if profile.roles.member.role_name == aws_role_name:
+ return profile_arn
+ else:
+ self.ctx.iam.remove_role_from_instance_profile( aws_instance_profile_name,
+ profile.roles.member.role_name )
+ self.ctx.iam.add_role_to_instance_profile( aws_instance_profile_name, aws_role_name )
+ return profile_arn
+
+ def _hash_iam_role_name( self, iam_role_name ):
+ # An IAM role name is limited to 64 characters so we hash it with D64 to get a short,
+ # but still unique identifier. Note that Box subclasses should append their CGCloud role
+ # name to the IAM role name. Prependi the prefix here and in _get_iam_ec2_role to be
+ # backwards-compatible PassRole statements generated by older versions of CGCloud.
+ return '-'.join( [ self.iam_role_name_prefix,
+ aws_d32.encode( hashlib.sha1( iam_role_name ).digest( )[ 0:8 ] ) ] )
+
+ iam_role_name_prefix = 'cgcloud'
+
+ def _role_arn( self, iam_role_name_prefix='' ):
+ """
+ Returns the ARN for roles with the given prefix in the current AWS account
+ """
+ aws_role_prefix = self.ctx.to_aws_name( iam_role_name_prefix + self.iam_role_name_prefix )
+ return 'arn:aws:iam::%s:role/%s*' % (self.ctx.account, aws_role_prefix)
+
+ def _get_iam_ec2_role( self ):
+ """
+ Returns the IAM role to be associated with this box.
+
+ :rtype: (str, dict)
+ :return A tuple of the form ( iam_role_name, policy_document ) where policy_document is
+ an IAM policy in the form of a dictionary that can be turned into JSON. When
+ overriding this method, subclasses should append to the tuple elements rather
+ than modify them in place.
+ """
+ return self.iam_role_name_prefix, { }
+
+ # http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
+ #
+ virtualization_types = [ 'paravirtual', 'hvm' ]
+ paravirtual_families = [ 'm1', 'c1', 'm2', 't1' ]
+
+ def __default_virtualization_type( self, instance_type ):
+ family = instance_type.split( '.', 2 )[ 0 ].lower( )
+ return 'paravirtual' if family in self.paravirtual_families else 'hvm'
+
+ def delete_image( self, image_ref, wait=True, delete_snapshot=True ):
+ image = self.__select_image( image_ref )
+ image_id = image.id
+ log.info( "Deregistering image %s", image_id )
+ image.deregister( )
+ if wait:
+ log.info( "Waiting for deregistration to finalize ..." )
+ while True:
+ if self.ctx.ec2.get_image( image_id ):
+ log.info( '... image still registered, trying again in %is ...' %
+ a_short_time )
+ time.sleep( a_short_time )
+ else:
+ log.info( "... image deregistered." )
+ break
+ if delete_snapshot:
+ self.__delete_image_snapshot( image, wait=wait )
+
+ def __delete_image_snapshot( self, image, wait=True ):
+ for root_device in self.possible_root_devices:
+ root_bdt = image.block_device_mapping.get( root_device )
+ if root_bdt:
+ snapshot_id = image.block_device_mapping[ root_device ].snapshot_id
+ log.info( "Deleting snapshot %s.", snapshot_id )
+ # It is safe to retry this indefinitely because a snapshot can only be
+ # referenced by one AMI. See also https://github.com/boto/boto/issues/3019.
+ for attempt in retry_ec2(
+ retry_for=a_long_time if wait else 0,
+ retry_while=lambda e: e.error_code == 'InvalidSnapshot.InUse' ):
+ with attempt:
+ self.ctx.ec2.delete_snapshot( snapshot_id )
+ return
+ raise RuntimeError( 'Could not determine root device in AMI' )
+
+ def _provide_generated_keypair( self,
+ ec2_keypair_name,
+ private_key_path,
+ overwrite_local=True,
+ overwrite_ec2=False ):
+ """
+ Expects to be running in a Fabric task context!
+
+ Ensures that 1) a key pair has been generated in EC2 under the given name, 2) a matching
+ private key exists on this box at the given path and 3) the corresponding public key
+ exists at the given path with .pub appended. A generated keypair is one for which EC2
+ generated the private key. This is different from imported keypairs where the private key
+ is generated locally and the public key is then imported to EC2.
+
+ Since EC2 exposes only the fingerprint for a particular key pair, but not the public key,
+ the public key of the generated key pair is additionally stored in S3. The public key
+ object in S3 will be identified using the key pair's fingerprint, which really is the the
+ private key's fingerprint. Note that this is different to imported key pairs which are
+ identified by their public key's fingerprint, both by EC2 natively and by cgcloud in S3.
+
+ If there already is a key pair in EC2 and a private key at the given path in this box,
+ they are checked to match each other. If they don't, an exception will be raised.
+
+ If there already is a local private key but no key pair in EC2, either an exception will
+ be raised (if overwrite_local is False) or a key pair will be created and the local
+ private key will be overwritten (if overwrite_local is True).
+
+ If there is a key pair in EC2 but no local private key, either an exception will be
+ raised (if overwrite_ec2 is False) or the key pair will be deleted and a new one will be
+ created in its stead (if overwrite_ec2 is True).
+
+ To understand the logic behind all this keep in mind that the private component of a
+ EC2-generated keypair can only be downloaded once, at creation time.
+
+ :param ec2_keypair_name: the name of the keypair in EC2
+ :param private_key_path: the path to the private key on this box
+ :param overwrite_local: whether to overwrite a local private key, see above
+ :param overwrite_ec2: whether to overwrite a keypair in EC2, see above
+ :return: the actual contents of the private and public keys as a tuple in that order
+ """
+
+ ec2_keypair = self.ctx.ec2.get_key_pair( ec2_keypair_name )
+ key_file_exists = run( 'test -f %s' % private_key_path, quiet=True ).succeeded
+
+ if ec2_keypair is None:
+ if key_file_exists:
+ if overwrite_local:
+ # TODO: make this more prominent, e.g. by displaying all warnings at the end
+ log.warn( 'Warning: Overwriting private key with new one from EC2.' )
+ else:
+ raise UserError( "Private key already exists on box. Creating a new key pair "
+ "in EC2 would require overwriting that file" )
+ ssh_privkey, ssh_pubkey = self.__generate_keypair( ec2_keypair_name, private_key_path )
+ else:
+ # With an existing keypair there is no way to get the private key from AWS,
+ # all we can do is check whether the locally stored private key is consistent.
+ if key_file_exists:
+ ssh_privkey, ssh_pubkey = self.__verify_generated_keypair( ec2_keypair,
+ private_key_path )
+ else:
+ if overwrite_ec2:
+ self.ctx.ec2.delete_key_pair( ec2_keypair_name )
+ ssh_privkey, ssh_pubkey = self.__generate_keypair( ec2_keypair_name,
+ private_key_path )
+ else:
+ raise UserError(
+ "The key pair {ec2_keypair.name} is registered in EC2 but the "
+ "corresponding private key file {private_key_path} does not exist on the "
+ "instance. In order to create the private key file, the key pair must be "
+ "created at the same time. Please delete the key pair from EC2 before "
+ "retrying.".format( **locals( ) ) )
+
+ # Store public key
+ put( local_path=StringIO( ssh_pubkey ), remote_path=private_key_path + '.pub' )
+
+ return ssh_privkey, ssh_pubkey
+
+ def __generate_keypair( self, ec2_keypair_name, private_key_path ):
+ """
+ Generate a keypair in EC2 using the given name and write the private key to the file at
+ the given path. Return the private and public key contents as a tuple.
+ """
+ ec2_keypair = self.ctx.ec2.create_key_pair( ec2_keypair_name )
+ if not ec2_keypair.material:
+ raise AssertionError( "Created key pair but didn't get back private key" )
+ ssh_privkey = ec2_keypair.material
+ put( local_path=StringIO( ssh_privkey ), remote_path=private_key_path )
+ assert ec2_keypair.fingerprint == ec2_keypair_fingerprint( ssh_privkey )
+ run( 'chmod go= %s' % private_key_path )
+ ssh_pubkey = private_to_public_key( ssh_privkey )
+ self.ctx.upload_ssh_pubkey( ssh_pubkey, ec2_keypair.fingerprint )
+ return ssh_privkey, ssh_pubkey
+
+ def __verify_generated_keypair( self, ec2_keypair, private_key_path ):
+ """
+ Verify that the given EC2 keypair matches the private key at the given path. Return the
+ private and public key contents as a tuple.
+ """
+ ssh_privkey = StringIO( )
+ get( remote_path=private_key_path, local_path=ssh_privkey )
+ ssh_privkey = ssh_privkey.getvalue( )
+ fingerprint = ec2_keypair_fingerprint( ssh_privkey )
+ if ec2_keypair.fingerprint != fingerprint:
+ raise UserError(
+ "The fingerprint {ec2_keypair.fingerprint} of key pair {ec2_keypair.name} doesn't "
+ "match the fingerprint {fingerprint} of the private key file currently present on "
+ "the instance. Please delete the key pair from EC2 before retrying. "
+ .format( **locals( ) ) )
+ ssh_pubkey = self.ctx.download_ssh_pubkey( ec2_keypair )
+ if ssh_pubkey != private_to_public_key( ssh_privkey ):
+ raise RuntimeError( "The private key on the data volume doesn't match the "
+ "public key in EC2." )
+ return ssh_privkey, ssh_pubkey
+
+ def _provide_imported_keypair( self, ec2_keypair_name, private_key_path, overwrite_ec2=False ):
+ """
+ Expects to be running in a Fabric task context!
+
+ Ensures that 1) a key pair has been imported to EC2 under the given name, 2) a matching
+ private key exists on this box at the given path and 3) the corresponding public key
+ exists at the given path with .pub appended.
+
+ If there is no private key at the given path on this box, one will be created. If there
+ already is a imported key pair in EC2, it is checked to match the local public key. If
+ they don't match an exception will be raised (overwrite_ec2 is False) or the EC2 key pair
+ will be replaced with a new one by importing the local public key. The public key itself
+ will be tracked in S3. See _provide_generated_keypair for details.
+
+ :param ec2_keypair_name: the name of the keypair in EC2
+ :param private_key_path: the path to the private key on this box (tilde will be expanded)
+ :return: the actual contents of the private and public keys as a tuple in that order
+ """
+ key_file_exists = run( 'test -f %s' % private_key_path, quiet=True ).succeeded
+ if not key_file_exists:
+ run( "ssh-keygen -N '' -C '%s' -f '%s'" % (ec2_keypair_name, private_key_path) )
+ ssh_privkey = StringIO( )
+ get( remote_path=private_key_path, local_path=ssh_privkey )
+ ssh_privkey = ssh_privkey.getvalue( )
+ ssh_pubkey = StringIO( )
+ get( remote_path=private_key_path + '.pub', local_path=ssh_pubkey )
+ ssh_pubkey = ssh_pubkey.getvalue( )
+ self.ctx.register_ssh_pubkey( ec2_keypair_name, ssh_pubkey, force=overwrite_ec2 )
+ return ssh_privkey, ssh_pubkey
+
+ @contextmanager
+ def _project_artifacts( self, project_name ):
+ """
+ Like project.project_artifacts() but uploads any source distributions to the instance
+ represented by this box such that a pip running on that instance box can install them.
+ Must be called directly or indirectly from a function decorated with fabric_task. Returns
+ a list of artifacts references, each reference being either a remote path to a source
+ distribution or a versioned dependency reference, typically referring to a package on PyPI.
+ """
+ artifacts = [ ]
+ for artifact in project_artifacts( project_name ):
+ if artifact.startswith( '/' ):
+ artifact = put( local_path=artifact )[ 0 ]
+ artifacts.append( artifact )
+
+ yield artifacts
+
+ for artifact in artifacts:
+ if artifact.startswith( '/' ):
+ run( 'rm %s' % quote( artifact ) )
diff --git a/core/src/cgcloud/core/centos_box.py b/core/src/cgcloud/core/centos_box.py
new file mode 100644
index 0000000..54b052e
--- /dev/null
+++ b/core/src/cgcloud/core/centos_box.py
@@ -0,0 +1,133 @@
+from abc import abstractmethod
+import re
+from distutils.version import LooseVersion
+
+from fabric.operations import run, sudo
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.agent_box import AgentBox
+from cgcloud.core.yum_box import YumBox
+from cgcloud.core.rc_local_box import RcLocalBox
+
+admin_user = 'admin'
+
+
+class CentosBox( YumBox, AgentBox, RcLocalBox ):
+ """
+ A box representing EC2 instances that boots of a RightScale CentOS AMI. Most of the
+ complexity in this class stems from a workaround for RightScale's handling of the root
+ account. RightScale does not offer a non-root admin account, so after the instance boots for
+ the first time, we create an admin account and disable SSH and console logins to the root
+ account, just like on Canonical's Ubuntu AMIs. The instance is tagged with the name of the
+ admin account such that we can look it up later.
+ """
+
+ @abstractmethod
+ def release( self ):
+ """
+ :return: the version number of the CentOS release, e.g. "6.4"
+ """
+ raise NotImplementedError
+
+ def __init__( self, ctx ):
+ super( CentosBox, self ).__init__( ctx )
+ self._username = None
+
+ def admin_account( self ):
+ if self._username is None:
+ default_username = 'root' if self.generation == 0 else 'admin'
+ self._username = self.instance.tags.get( 'admin_user', default_username )
+ return self._username
+
+ def _set_username( self, admin_user ):
+ self._username = admin_user
+ self.instance.add_tag( 'admin_user', admin_user )
+
+ def _base_image( self, virtualization_type ):
+ release = self.release( )
+ images = self.ctx.ec2.get_all_images(
+ owners=[ '411009282317' ],
+ filters={
+ 'name': 'RightImage_CentOS_%s_x64*' % release,
+ 'root-device-type': 'ebs',
+ 'virtualization-type': virtualization_type } )
+ if not images:
+ raise self.NoSuchImageException(
+ "Can't find any candidate AMIs for CentOS release %s and virtualization type %s" % (
+ release, virtualization_type) )
+ max_version = None
+ base_image = None
+ for image in images:
+ match = re.match( 'RightImage_CentOS_(\d+(?:\.\d+)*)_x64_v(\d+(?:\.\d+)*)(_HVM)?_EBS',
+ image.name )
+ if match:
+ assert match.group( 1 ) == release
+ version = LooseVersion( match.group( 2 ) )
+ if max_version is None or max_version < version:
+ max_version = version
+ base_image = image
+ if not base_image:
+ raise self.NoSuchImageException(
+ "Can't find AMI matching CentOS release %s and virtualization type %s" % (
+ release, virtualization_type) )
+ return base_image
+
+ def _on_instance_ready( self, first_boot ):
+ super( CentosBox, self )._on_instance_ready( first_boot )
+ if first_boot and self.admin_account( ) == 'root':
+ self.__create_admin( )
+ self._set_username( admin_user )
+ self.__setup_admin( )
+
+ @fabric_task
+ def __create_admin( self ):
+ # Don't clear screen on logout, it's annoying
+ run( r"sed -i -r 's!^(/usr/bin/)?clear!# \0!' /etc/skel/.bash_logout ~/.bash_logout" )
+ # Imitate the security model of Canonical's Ubuntu AMIs: Create an admin user that can sudo
+ # without password and disable root logins via console and ssh.
+ run( 'useradd -m -s /bin/bash {0}'.format( admin_user ) )
+ self._propagate_authorized_keys( admin_user )
+ run( 'rm ~/.ssh/authorized_keys' )
+ run( 'echo "{0} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers'.format( admin_user ) )
+ # CentOS 6 has "Defaults: requiretty" in /etc/sudoers. This makes no sense for users with
+ # NOPASSWD. Requiretty causes sudo(...,pty=False) to fail with "You need a pty for
+ # sudo". We disable requiretty for the admin since we need pty=False for pip which would
+ # spill the progress info all over the output.
+ run( 'echo "Defaults:{0} !requiretty" >> /etc/sudoers'.format( admin_user ) )
+ run( 'passwd -l root' )
+ run( 'echo PermitRootLogin no >> /etc/ssh/sshd_config' )
+
+ @fabric_task
+ def __setup_admin( self ):
+ run( "echo 'export PATH=\"/usr/local/sbin:/usr/sbin:/sbin:$PATH\"' >> ~/.bash_profile" )
+
+ if False:
+ # I recently discovered the undocumented AuthorizedKeysFile2 option which had been
+ # supported by OpenSSH for a long time. Considering that Ubuntu, too, lacks multi-file
+ # AuthorizedKeysFile in releases before Raring, we would have to update OpenSSH on those
+ # releases as well.
+
+ @fabric_task
+ def _update_openssh( self ):
+ """
+ Our cghub-cloud-agent needs a newer version of OpenSSH that support listing with
+ multiple files for the sshd_conf option AuthorizedKeysFile. The stock CentOS 5 and 6
+ don't have one so we'll install a custom RPM. The multiple file support was added in
+ version 5.9 of OpenSSH.
+
+ This method should to be invoked early on during setup.
+ """
+ # I wwasn't able to cusotm build openssh-askpass as it depends on X11 and whatnot,
+ # but it's not crucial so we'll skip it, or rather remove the old version of it
+ self._yum_remove( 'openssh-askpass' )
+ base_url = 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/'
+ self._yum_local( is_update=True, rpm_urls=[
+ base_url + 'openssh-6.3p1-1.x86_64.rpm',
+ base_url + 'openssh-clients-6.3p1-1.x86_64.rpm',
+ base_url + 'openssh-server-6.3p1-1.x86_64.rpm' ] )
+ self._run_init_script( 'sshd', 'restart' )
+
+ @fabric_task
+ def _run_init_script( self, name, command='start' ):
+ script_path = self._init_script_path( name )
+ sudo( '%s %s' % (script_path, command) )
diff --git a/core/src/cgcloud/core/cli.py b/core/src/cgcloud/core/cli.py
new file mode 100755
index 0000000..94d3a33
--- /dev/null
+++ b/core/src/cgcloud/core/cli.py
@@ -0,0 +1,135 @@
+# PYTHON_ARGCOMPLETE_OK
+
+from __future__ import absolute_import
+from collections import OrderedDict
+from importlib import import_module
+import logging
+import os
+import sys
+import imp
+from bd2k.util.iterables import concat
+
+from cgcloud.lib.util import Application, app_name, UserError
+import cgcloud.core
+
+log = logging.getLogger( __name__ )
+
+
+def plugin_module( plugin ):
+ """
+ >>> plugin_module('cgcloud.core') # doctest: +ELLIPSIS
+ <module 'cgcloud.core' from '...'>
+ >>> plugin_module('cgcloud.foobar')
+ Traceback (most recent call last):
+ ...
+ UserError: Cannot find plugin module 'cgcloud.foobar'. Running 'pip install cgcloud-foobar' may fix this.
+ """
+ try:
+ return import_module( plugin )
+ except ImportError:
+ raise UserError(
+ "Cannot find plugin module '%s'. Running 'pip install %s' may fix this." % (
+ plugin, plugin.replace( '.', '-' )) )
+
+
+def main( args=None ):
+ """
+ This is the cgcloud entry point. It should be installed via setuptools.setup(entry_points=...)
+ """
+ root_logger = CGCloud.setup_logging( )
+ try:
+ plugins = os.environ.get( 'CGCLOUD_PLUGINS', '' ).strip( )
+ plugins = concat( cgcloud.core,
+ [ plugin_module( plugin ) for plugin in plugins.split( ":" ) if plugin ] )
+ app = CGCloud( plugins, root_logger )
+ for plugin in plugins:
+ if hasattr( plugin, 'command_classes' ):
+ for command_class in plugin.command_classes( ):
+ app.add( command_class )
+ app.run( args )
+ except UserError as e:
+ log.error( e.message )
+ sys.exit( 255 )
+
+
+class LoggingFormatter( logging.Formatter ):
+ """
+ A formatter that logs the thread name of secondary threads, but not the main thread.
+ """
+
+ def __init__( self ):
+ super( LoggingFormatter, self ).__init__( "%(threadName)s%(levelname)s: %(message)s" )
+
+ def format( self, record ):
+ if record.threadName == 'MainThread':
+ record.threadName = ''
+ elif record.threadName is not None:
+ record.threadName += ' '
+ return super( LoggingFormatter, self ).format( record )
+
+
+class CGCloud( Application ):
+ """
+ The main CLI application
+ """
+ debug_log_file_name = '%s.{pid}.log' % app_name( )
+
+ def __init__( self, plugins, root_logger=None ):
+ super( CGCloud, self ).__init__( )
+ self.root_logger = root_logger
+ self.option( '--debug',
+ default=False, action='store_true',
+ help='Write debug log to %s in current directory.' % self.debug_log_file_name )
+ self.option( '--script', '-s', metavar='PATH',
+ help='The path to a Python script with additional role definitions.' )
+ self.roles = OrderedDict( )
+ self.cluster_types = OrderedDict( )
+ for plugin in plugins:
+ self._import_plugin_roles( plugin )
+
+ def _import_plugin_roles( self, plugin ):
+ if hasattr( plugin, 'roles' ):
+ for role in plugin.roles( ):
+ self.roles[ role.role( ) ] = role
+ if hasattr( plugin, 'cluster_types' ):
+ for cluster_type in plugin.cluster_types( ):
+ self.cluster_types[ cluster_type.name( ) ] = cluster_type
+
+ def prepare( self, options ):
+ if self.root_logger:
+ if options.debug:
+ self.root_logger.setLevel( logging.DEBUG )
+ file_name = self.debug_log_file_name.format( pid=os.getpid( ) )
+ file_handler = logging.FileHandler( file_name )
+ file_handler.setLevel( logging.DEBUG )
+ file_handler.setFormatter( logging.Formatter(
+ '%(asctime)s: %(levelname)s: %(name)s: %(message)s' ) )
+ self.root_logger.addHandler( file_handler )
+ else:
+ self.silence_boto_and_paramiko( )
+ if options.script:
+ plugin = imp.load_source( os.path.splitext( os.path.basename( options.script ) )[ 0 ],
+ options.script )
+ self._import_plugin_roles( plugin )
+
+ @classmethod
+ def setup_logging( cls ):
+ root_logger = logging.getLogger( )
+ # Only setup logging if it hasn't been done already
+ if len( root_logger.handlers ) == 0:
+ root_logger.setLevel( logging.INFO )
+ stream_handler = logging.StreamHandler( sys.stderr )
+ stream_handler.setFormatter( LoggingFormatter( ) )
+ stream_handler.setLevel( logging.INFO )
+ root_logger.addHandler( stream_handler )
+ return root_logger
+ else:
+ return None
+
+ @classmethod
+ def silence_boto_and_paramiko( cls ):
+ # There are quite a few cases where we expect AWS requests to fail, but it seems
+ # that boto handles these by logging the error *and* raising an exception. We
+ # don't want to confuse the user with those error messages.
+ logging.getLogger( 'boto' ).setLevel( logging.CRITICAL )
+ logging.getLogger( 'paramiko' ).setLevel( logging.WARN )
diff --git a/core/src/cgcloud/core/cloud_init_box.py b/core/src/cgcloud/core/cloud_init_box.py
new file mode 100644
index 0000000..a121d7b
--- /dev/null
+++ b/core/src/cgcloud/core/cloud_init_box.py
@@ -0,0 +1,254 @@
+import logging
+import time
+from StringIO import StringIO
+from abc import abstractmethod
+from functools import partial
+
+import paramiko
+import yaml
+from fabric.operations import put
+from paramiko import Channel
+
+from cgcloud.core.box import Box, fabric_task
+from cgcloud.core.package_manager_box import PackageManagerBox
+from cgcloud.lib.ec2 import ec2_instance_types
+from cgcloud.lib.util import heredoc
+
+log = logging.getLogger( __name__ )
+
+
+class CloudInitBox( PackageManagerBox ):
+ """
+ A box that uses Canonical's cloud-init to initialize the EC2 instance.
+ """
+
+ def _ephemeral_mount_point( self, i ):
+ return '/mnt/ephemeral' + ('' if i == 0 else str( i ))
+
+ @abstractmethod
+ def _get_package_installation_command( self, package ):
+ """
+ Return the command that needs to be invoked to install the given package. The returned
+ command is an array whose first element is a path or file name of an executable while the
+ remaining elements are arguments to that executable.
+ """
+ raise NotImplementedError( )
+
+ def _get_virtual_block_device_prefix( self ):
+ """
+ Return the common prefix of paths representing virtual block devices on this box.
+ """
+ return '/dev/xvd'
+
+ def _populate_cloud_config( self, instance_type, user_data ):
+ """
+ Populate cloud-init's configuration for injection into a newly created instance
+
+ :param user_data: a dictionary that will be be serialized into YAML and used as the
+ instance's user-data
+ """
+ # see __wait_for_cloud_init_completion()
+ runcmd = user_data.setdefault( 'runcmd', [ ] )
+ runcmd.append( [ 'touch', '/tmp/cloud-init.done' ] )
+
+ #
+ # Lucid's and Oneiric's cloud-init mount ephemeral storage on /mnt instead of
+ # /mnt/ephemeral, Fedora doesn't mount it at all. To keep it consistent across
+ # releases and platforms we should be explicit.
+ #
+ # Also note that Lucid's mountall waits on the disk device. On t1.micro instances this
+ # doesn't show up causing Lucid to hang on boot on this type. The cleanest way to handle
+ # this is to remove the ephemeral entry on t1.micro instances by specififying [
+ # 'ephemeral0', None ]. Unfortunately, there is a bug [1] in cloud-init that causes the
+ # removal of the entry to be ineffective. The "nobootwait" option might be a workaround
+ # but Fedora stopped supporting it such that now only Ubuntu supports it. A better
+ # workaround is to always have the ephemeral entry in fstab, even on micro instances,
+ # but to exclude the 'auto' option such that when cloud-init runs 'mount -a', it will not
+ # get mounted. We can then mount the filesystem explicitly, except on micro instances.
+ #
+ # The important thing to keep in mind is that when booting instance B from an image
+ # created on a instance A, the fstab from A will be used by B before cloud-init can make
+ # its changes to fstab. This behavior is a major cause of problems and the reason why
+ # micro instances tend to freeze when booting from images created on non-micro instances
+ # since their fstab initially refers to an ephemeral volume that doesn't exist. The
+ # nobootwait and nofail flags are really just attempts at working around this issue.
+ #
+ # [1]: https://bugs.launchpad.net/cloud-init/+bug/1291820
+ #
+ mounts = user_data.setdefault( 'mounts', [ ] )
+ mounts.append(
+ [ 'ephemeral0', self._ephemeral_mount_point( 0 ), 'auto', 'defaults,noauto' ] )
+
+ commands = [ ]
+
+ # On instances booted from a stock image, mdadm will likely be missing. So we should
+ # install it. And we should install it early during boot, before the ephemeral drives are
+ # RAIDed. Furthermore, we need to install mdadm on every instance type, not just the
+ # ones with multiple ephemeral drives, since an image taken from an instance with one
+ # ephemeral volume may be used to spawn an instance with multiple ephemeral volumes.
+ # However, since we don't run `apt-get update`, there is a chance that the package index
+ # is stale and that the installation fails. We therefore also install it during regular
+ # setup.
+ if self.generation == 0:
+ commands.append( self._get_package_installation_command( 'mdadm' ) )
+ num_disks = instance_type.disks
+ device_prefix = self._get_virtual_block_device_prefix( )
+
+ def device_name( i ):
+ return device_prefix + (chr( ord( 'b' ) + i ))
+
+ if num_disks == 0:
+ pass
+ elif instance_type.disk_type == 'HDD':
+ # For HDDs we assume the disk is formatted and we mount each disk separately
+ for i in range( num_disks ):
+ mount_point = self._ephemeral_mount_point( i )
+ if mount_point is not None:
+ commands.extend( [
+ [ 'mkdir', '-p', mount_point ],
+ [ 'mount', device_name( i ), mount_point ] ] )
+ elif num_disks == 1:
+ # The r3 family does not format the ephemeral SSD volume so will have to do it
+ # manually. Other families may also exhibit that behavior so we will format every SSD
+ # volume. It only takes a second *and* ensures that we have a particular type of
+ # filesystem, i.e. ext4. We don't know what the device will be (cloud-init determines
+ # this at runtime) named so we simply try all possible names.
+ if instance_type.disk_type == 'SSD':
+ commands.append( [ 'mkfs.ext4', '-E', 'nodiscard', device_name( 0 ) ] )
+ mount_point = self._ephemeral_mount_point( 0 )
+ commands.extend( [
+ [ 'mkdir', '-p', mount_point ],
+ [ 'mount', device_name( 0 ), mount_point ] ] )
+ elif num_disks > 1:
+ # RAID multiple SSDs into one, then format and mount it.
+ devices = [ device_name( i ) for i in range( num_disks ) ]
+ mount_point = self._ephemeral_mount_point( 0 )
+ commands.extend( [
+ [ 'mdadm',
+ '--create', '/dev/md0',
+ '--run', # do not prompt for confirmation
+ '--level', '0', # RAID 0, i.e. striped
+ '--raid-devices', str( num_disks ) ] + devices,
+ # Disable auto scan at boot time, which would otherwise mount device on reboot
+ # as md127 before these commands are run.
+ 'echo "AUTO -all" > /etc/mdadm/mdadm.conf',
+ # Copy mdadm.conf into init ramdisk
+ [ 'update-initramfs', '-u' ],
+ [ 'mkfs.ext4', '-E', 'nodiscard', '/dev/md0' ],
+ [ 'mkdir', '-p', mount_point ],
+ [ 'mount', '/dev/md0', mount_point ] ] )
+ else:
+ assert False
+
+ # Prepend commands as a best effort to getting volume preparation done as early as
+ # possible in the boot sequence. Note that CloudInit's 'bootcmd' is run on every boot,
+ # 'runcmd' only once after instance creation.
+ bootcmd = user_data.setdefault( 'bootcmd', [ ] )
+ bootcmd[ 0:0 ] = commands
+
+ def _spec_block_device_mapping( self, spec, image ):
+ super( CloudInitBox, self )._spec_block_device_mapping( spec, image )
+ cloud_config = { }
+ instance_type = ec2_instance_types[ spec[ 'instance_type' ] ]
+ self._populate_cloud_config( instance_type, cloud_config )
+ if cloud_config:
+ if 'user_data' in spec:
+ raise ReferenceError( "Conflicting user-data" )
+ user_data = '#cloud-config\n' + yaml.dump( cloud_config )
+ spec[ 'user_data' ] = user_data
+
+ def _on_instance_ready( self, first_boot ):
+ super( CloudInitBox, self )._on_instance_ready( first_boot )
+ if first_boot:
+ self.__wait_for_cloud_init_completion( )
+ if self.generation == 0:
+ self.__add_per_boot_script( )
+
+ def _cloudinit_boot_script( self, name ):
+ return '/var/lib/cloud/scripts/per-boot/cgcloud-' + name
+
+ @fabric_task
+ def __add_per_boot_script( self ):
+ """
+ Ensure that the cloud-init.done file is always created, even on 2nd boot and thereafter.
+ On the first boot of an instance, the .done file creation is preformed by the runcmd
+ stanza in cloud-config. On subsequent boots this per-boot script takes over (runcmd is
+ skipped on those boots).
+ """
+ put( remote_path=self._cloudinit_boot_script( 'done' ), mode=0755, use_sudo=True,
+ local_path=StringIO( heredoc( """
+ #!/bin/sh
+ touch /tmp/cloud-init.done""" ) ) )
+
+ def __wait_for_cloud_init_completion( self ):
+ """
+ Wait for cloud-init to finish its job such as to avoid getting in its way. Without this,
+ I've seen weird errors with 'apt-get install' not being able to find any packages.
+
+ Since this method belongs to a mixin, the author of a derived class is responsible for
+ invoking this method before any other setup action.
+ """
+ # /var/lib/cloud/instance/boot-finished is only being written by newer cloud-init releases.
+ # For example, it isn't being written by the cloud-init for Lucid. We must use our own file
+ # created by a runcmd, see _populate_cloud_config()
+ #
+ # This function is called on every node in a cluster during that cluster's creation. For
+ # that reason we want to avoid contention on the lock in @fabric_task that's protecting
+ # the thread-unsafe Fabric code. This contention is aggravated by the fact that,
+ # for some unkown reason, the first SSH connection to a node takes unusually long. With a
+ # lock serialising all calls to this method we have to wait for the delay for every node
+ # in sequence, in O(N) time. Paramiko, OTOH, is thread-safe allowing us to do the wait
+ # in concurrently, in O(1) time.
+
+ command = ';'.join( [
+ 'echo -n "Waiting for cloud-init to finish ..."',
+ 'while [ ! -e /tmp/cloud-init.done ]',
+ 'do echo -n "."',
+ 'sleep 1 ',
+ 'done ',
+ 'echo "... cloud-init done."' ] )
+
+ self._run( command )
+
+ def _run( self, cmd ):
+ def stream( name, recv_ready, recv, logger ):
+ i = 0
+ r = ''
+ try:
+ while recv_ready( ):
+ s = recv( 1024 )
+ if not s: break
+ i += 1
+ ls = s.splitlines( )
+ # Prepend partial line from previous iteration to first line from this
+ # iteration. Note that the first line may be a partial line, too.
+ ls[ 0 ] = r + ls[ 0 ]
+ # Log all complete lines
+ for l in ls[ :-1 ]:
+ logger( "%s: %s", name, l )
+ r = ls[ -1 ]
+ finally:
+ # No chance to complete the partial line anytime soon, so log it.
+ if r: logger( r )
+ return i
+
+ client = self._ssh_client( )
+ try:
+ with client.get_transport( ).open_session( ) as chan:
+ assert isinstance( chan, Channel )
+ chan.exec_command( cmd )
+ streams = (
+ partial( stream, 'stderr', chan.recv_stderr_ready, chan.recv_stderr, log.warn ),
+ partial( stream, 'stdout', chan.recv_ready, chan.recv, log.info ))
+ while sum( stream( ) for stream in streams ) or not chan.exit_status_ready( ):
+ time.sleep( paramiko.common.io_sleep )
+ assert 0 == chan.recv_exit_status( )
+ finally:
+ client.close( )
+
+ def _list_packages_to_install( self ):
+ # As a fallback from failed installations of mdadm at boot time, we should install mdadm
+ # unconditionally: https://github.com/BD2KGenomics/cgcloud/issues/194
+ return super( CloudInitBox, self )._list_packages_to_install( ) + [
+ 'mdadm' ]
+
diff --git a/core/src/cgcloud/core/cluster.py b/core/src/cgcloud/core/cluster.py
new file mode 100644
index 0000000..ee25587
--- /dev/null
+++ b/core/src/cgcloud/core/cluster.py
@@ -0,0 +1,147 @@
+import logging
+from abc import ABCMeta, abstractproperty
+
+from cgcloud.core.box import Box
+from cgcloud.lib.util import (abreviated_snake_case_class_name, papply, thread_pool)
+
+log = logging.getLogger( __name__ )
+
+
+class Cluster( object ):
+ """
+ A cluster consists of one leader box and N worker boxes. A box that is part of a cluster is
+ referred to as "node". There is one role (subclass of Box) describing the leader node and
+ another one describing the workers. Leader and worker roles are siblings and their common
+ ancestor--the node role--describes the software deployed on them, which is identical for both
+ leader and workers. The node role is used to create the single image from which the actual
+ nodes will be booted from when the cluster is created. In other words, the specialization
+ into leader and workers happens at cluster creation time, not earlier.
+ """
+ __metaclass__ = ABCMeta
+
+ def __init__( self, ctx ):
+ super( Cluster, self ).__init__( )
+ self.ctx = ctx
+
+ @abstractproperty
+ def leader_role( self ):
+ """
+ :return: The Box subclass to use for the leader
+ """
+ raise NotImplementedError( )
+
+ @abstractproperty
+ def worker_role( self ):
+ """
+ :return: The Box subclass to use for the workers
+ """
+ raise NotImplementedError( )
+
+ @classmethod
+ def name( cls ):
+ return abreviated_snake_case_class_name( cls, Cluster )
+
+ def apply( self, f, cluster_name=None, ordinal=None, leader_first=True, skip_leader=False,
+ wait_ready=True, operation='operation', pool_size=None, callback=None ):
+ """
+ Apply a callable to the leader and each worker. The callable may be applied to multiple
+ workers concurrently.
+ """
+ # Look up the leader first, even if leader_first is False or skip_leader is True. That
+ # way we fail early if the cluster doesn't exist.
+ leader = self.leader_role( self.ctx )
+ leader.bind( cluster_name=cluster_name, ordinal=ordinal, wait_ready=wait_ready )
+ first_worker = self.worker_role( self.ctx )
+
+ def apply_leader( ):
+ if not skip_leader:
+ log.info( '=== Performing %s on leader ===', operation )
+ result = f( leader )
+ if callback is not None:
+ callback( result )
+
+ def apply_workers( ):
+ log.info( '=== Performing %s on workers ===', operation )
+ workers = first_worker.list( leader_instance_id=leader.instance_id,
+ wait_ready=wait_ready )
+ # zip() creates the singleton tuples that papply() expects
+ papply( f, seq=zip( workers ), pool_size=pool_size, callback=callback )
+
+ if leader_first:
+ apply_leader( )
+ apply_workers( )
+ else:
+ apply_workers( )
+ apply_leader( )
+
+
+class ClusterBox( Box ):
+ """
+ A mixin for a box that is part of a cluster
+ """
+
+ def _set_instance_options( self, options ):
+ super( ClusterBox, self )._set_instance_options( options )
+ self.ebs_volume_size = int( options.get( 'ebs_volume_size' ) or 0 )
+
+ def _get_instance_options( self ):
+ return dict( super( ClusterBox, self )._get_instance_options( ),
+ ebs_volume_size=str( self.ebs_volume_size ),
+ leader_instance_id=self.instance_id)
+
+ @classmethod
+ def _get_node_role( cls ):
+ """
+ Return the role (box class) from which the node image should be created.
+ """
+ # Traverses the inheritance DAG upwards until we find a class that has this class as a
+ # base, i.e. that mixes in this class. The traversal itself only follows the first base
+ # class.
+ while cls not in (ClusterBox, ClusterLeader, ClusterWorker, Box):
+ if ClusterBox in cls.__bases__:
+ return cls
+ else:
+ # noinspection PyMethodFirstArgAssignment
+ cls = cls.__bases__[ 0 ]
+ assert False, "Class %s doesn't have an ancestor that mixes in %s" % (cls, ClusterBox)
+
+ def _image_name_prefix( self ):
+ # The default implementation of this method derives the image name prefix from the
+ # concrete class name. The leader and workers are booted from the node image so we need
+ # to pin the name using the node role.
+ return self._get_node_role( ).role( )
+
+ def _security_group_name( self ):
+ # The default implementation of this method derives the security group name from the
+ # concrete class name. The leader and workers must use be assigned the same security
+ # group (because the group allows traffic only within the group) so we need to pin
+ # the name using the node role.
+ return self._get_node_role( ).role( )
+
+
+class ClusterLeader( ClusterBox ):
+ """
+ A mixin for a box that serves as a leader in a cluster
+ """
+ def _get_instance_options( self ):
+ return dict( super( ClusterLeader, self )._get_instance_options( ) )
+
+
+class ClusterWorker( ClusterBox ):
+ """
+ A mixin for a box that serves as a leader in a cluster
+ """
+
+ def __init__( self, ctx ):
+ super( ClusterWorker, self ).__init__( ctx )
+ self.leader_instance_id = None
+
+ def _set_instance_options( self, options ):
+ super( ClusterWorker, self )._set_instance_options( options )
+ self.leader_instance_id = options.get( 'leader_instance_id' )
+ if self.cluster_name is None:
+ self.cluster_name = self.leader_instance_id
+
+ def _get_instance_options( self ):
+ return dict( super( ClusterWorker, self )._get_instance_options( ),
+ leader_instance_id=self.leader_instance_id )
diff --git a/core/src/cgcloud/core/cluster_commands.py b/core/src/cgcloud/core/cluster_commands.py
new file mode 100644
index 0000000..62cc3e3
--- /dev/null
+++ b/core/src/cgcloud/core/cluster_commands.py
@@ -0,0 +1,410 @@
+import logging
+import os
+import sys
+from abc import abstractmethod
+from functools import partial
+
+from bd2k.util.exceptions import panic
+from bd2k.util.expando import Expando
+
+from cgcloud.core.commands import (RecreateCommand,
+ ContextCommand,
+ SshCommandMixin,
+ RsyncCommandMixin)
+from cgcloud.lib.util import (abreviated_snake_case_class_name,
+ UserError,
+ heredoc,
+ thread_pool,
+ allocate_cluster_ordinals)
+
+log = logging.getLogger( __name__ )
+
+
+class ClusterTypeCommand( ContextCommand ):
+ def __init__( self, application ):
+ """
+ Set later, once we have a context.
+ :type: Cluster
+ """
+ super( ClusterTypeCommand, self ).__init__( application )
+ self.option( '--num-threads', metavar='NUM',
+ type=int, default=100,
+ help='The maximum number of tasks to be performed concurrently.' )
+
+ self.option( 'cluster_type', metavar='TYPE',
+ completer=self.completer,
+ help=heredoc( """The type of the cluster to be used. The cluster type is
+ covariant with the role of the leader node. For example, a box performing
+ the 'foo-leader' role will be part of a cluster of type 'foo'.""" ) )
+
+ # noinspection PyUnusedLocal
+ def completer( self, prefix, **kwargs ):
+ return [ cluster_type
+ for cluster_type in self.application.cluster_types.iterkeys( )
+ if cluster_type.startswith( prefix ) ]
+
+ def run_in_ctx( self, options, ctx ):
+ try:
+ cluster_type = self.application.cluster_types[ options.cluster_type ]
+ except KeyError:
+ raise UserError( "Unknown cluster type '%s'" % options.cluster_type )
+ self.run_on_cluster_type( ctx, options, cluster_type )
+
+ @abstractmethod
+ def run_on_cluster_type( self, ctx, options, cluster_type ):
+ raise NotImplementedError( )
+
+
+class CreateClusterCommand( ClusterTypeCommand, RecreateCommand ):
+ """
+ Creates a cluster with one leader and one or more workers.
+ """
+
+ def __init__( self, application ):
+ super( CreateClusterCommand, self ).__init__( application )
+ self.cluster = None
+
+ self.option( '--cluster-name', '-c', metavar='NAME',
+ help=heredoc( """A name for the new cluster. If absent, the instance ID of
+ the master will be used. Cluster names do not need to be unique, but they
+ should be in order to avoid user error.""" ) )
+
+ self.option( '--num-workers', '-s', metavar='NUM',
+ type=int, default=1,
+ help='The number of workers to launch.' )
+
+ self.option( '--ebs-volume-size', '-e', metavar='GB',
+ help=heredoc( """The size in GB of an EBS volume to be attached to each node
+ for persistent data. The volume will be mounted at /mnt/persistent.""" ) )
+
+ self.option( '--leader-on-demand', '-D',
+ default=False, action='store_true',
+ help=heredoc( """Use this option to insure that the leader will be an
+ on-demand instance, even if --spot-bid is given.""" ) )
+
+ self.option( '--share', '-S', metavar='PATH',
+ default=None, dest='share_path',
+ help=heredoc( """The path to a local file or directory for distribution to
+ the cluster. The given file or directory (or the contents of the given
+ directory, if the path ends in a slash) will be placed in the default user's
+ ~/shared directory on each node.""" ) )
+
+ self.option( '--ssh-opts', metavar='OPTS', default=None,
+ help=heredoc( """Additional options to pass to ssh when uploading the files
+ shared via rsync. For more detail refer to cgcloud rsync --help""" ) )
+
+ def preparation_kwargs( self, options, box ):
+ return dict( super( CreateClusterCommand, self ).preparation_kwargs( options, box ),
+ cluster_name=options.cluster_name,
+ ebs_volume_size=options.ebs_volume_size )
+
+ def creation_kwargs( self, options, box ):
+ return dict( super( CreateClusterCommand, self ).creation_kwargs( options, box ),
+ num_instances=options.num_workers )
+
+ def option( self, option_name, *args, **kwargs ):
+ _super = super( CreateClusterCommand, self )
+ if option_name in ('role', '--terminate'):
+ # Suppress the role positional argument since the role is hard-wired and the
+ # --terminate option since it doesn't make sense when creating clusters.
+ return
+ if option_name == '--instance-type':
+ # We want --instance-type to apply to the workers and --leader-instance-type to the
+ # leader. Furthermore, we want --leader-instance-type to default to the value of
+ # --instance-type.
+ assert 'dest' not in kwargs
+ assert args[ 0 ] == '-t'
+ kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'for the box',
+ 'for the leader' )
+ _super.option( '--leader-instance-type', '-T',
+ *args[ 1: ], dest='instance_type', **kwargs )
+ kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'leader', 'workers' )
+ kwargs[ 'dest' ] = 'worker_instance_type'
+ _super.option( option_name, *args, **kwargs )
+
+ def run( self, options ):
+ # Validate shared path
+ if options.share_path is not None:
+ if not os.path.exists( options.share_path ):
+ raise UserError( "No such file or directory: '%s'" % options.share_path )
+ # --leader-instance-type should default to the value of --instance-type
+ if options.instance_type is None:
+ options.instance_type = options.worker_instance_type
+ super( CreateClusterCommand, self ).run( options )
+
+ def run_on_cluster_type( self, ctx, options, cluster_type ):
+ self.cluster = cluster_type( ctx )
+ leader_role = self.cluster.leader_role
+ options.role = leader_role.role( )
+ self.run_on_role( options, ctx, leader_role )
+
+ def run_on_box( self, options, leader ):
+ """
+ :type leader: cgcloud.core.box.Box
+ """
+ log.info( '=== Creating leader ===' )
+ preparation_kwargs = self.preparation_kwargs( options, leader )
+ if options.leader_on_demand:
+ preparation_kwargs = { k: v for k, v in preparation_kwargs.iteritems( )
+ if not k.startswith( 'spot_' ) }
+ spec = leader.prepare( **preparation_kwargs )
+ creation_kwargs = dict( self.creation_kwargs( options, leader ),
+ num_instances=1,
+ # We must always wait for the leader since workers depend on it.
+ wait_ready=True )
+ leader.create( spec, **creation_kwargs )
+ try:
+ self.run_on_creation( leader, options )
+ except:
+ if options.terminate is not False:
+ with panic( log ):
+ leader.terminate( wait=False )
+ raise
+ # Leader is fully setup, even if the code below fails to add workers,
+ # the GrowClusterCommand can be used to recover from that failure.
+ if options.num_workers:
+ log.info( '=== Creating workers ===' )
+ first_worker = self.cluster.worker_role( leader.ctx )
+ preparation_kwargs = dict( self.preparation_kwargs( options, first_worker ),
+ leader_instance_id=leader.instance_id,
+ instance_type=options.worker_instance_type )
+ spec = first_worker.prepare( **preparation_kwargs )
+ with thread_pool( min( options.num_threads, options.num_workers ) ) as pool:
+ workers = first_worker.create( spec,
+ cluster_ordinal=leader.cluster_ordinal + 1,
+ executor=pool.apply_async,
+ **self.creation_kwargs( options, first_worker ) )
+ else:
+ workers = [ ]
+ if options.list:
+ self.list( [ leader ] )
+ self.list( workers, print_headers=False )
+ if not workers:
+ log.warn("This cluster has no workers. You may ssh into the leader now but you should "
+ "use 'cgcloud grow-cluster' to add worker instances before doing real work." )
+ self.log_ssh_hint( options )
+
+ def run_on_creation( self, leader, options ):
+ local_path = options.share_path
+ if local_path is not None:
+ log.info( '=== Copying %s%s to ~/shared on leader ===',
+ 'the contents of ' if local_path.endswith( '/' ) else '', local_path )
+ leader.rsync( args=[ '-r', local_path, ":shared/" ], ssh_opts=options.ssh_opts )
+
+ def ssh_hint( self, options ):
+ hint = super( CreateClusterCommand, self ).ssh_hint( options )
+ hint.options.append( Expando( name='-c', value=options.cluster_name, default=None ) )
+ hint.object = 'cluster'
+ return hint
+
+
+class ClusterCommand( ClusterTypeCommand ):
+ def __init__( self, application ):
+ super( ClusterCommand, self ).__init__( application )
+
+ self.option( '--cluster-name', '-c', metavar='NAME',
+ help=heredoc( """The name of the cluster to operate on. The default is to
+ consider all clusters of the given type regardless of their name,
+ using --ordinal to disambiguate. Note that the cluster name is not
+ necessarily unique, not even with a specific cluster type, there may be more
+ than one cluster of a particular name and type.""" ) )
+
+ self.option( '--ordinal', '-o', default=-1, type=int,
+ help=heredoc( """Selects an individual cluster from the list of currently
+ running clusters of the given cluster type and name. Since there is one
+ leader per cluster, this is equal to the ordinal of the leader among all
+ leaders of clusters of the given type and name. The ordinal is a zero-based
+ index into the list of all clusters of the specified type and name,
+ sorted by creation time. This means that the ordinal of a cluster is not
+ fixed, it may change if another cluster of the same type and name is
+ terminated. If the ordinal is negative, it will be converted to a positive
+ ordinal by adding the number of clusters of the specified type. Passing -1,
+ for example, selects the most recently created box.""" ) )
+
+ def run_on_cluster_type( self, ctx, options, cluster_type ):
+ cluster = cluster_type( ctx )
+ self.run_on_cluster( options, ctx, cluster )
+
+ @abstractmethod
+ def run_on_cluster( self, options, ctx, cluster ):
+ raise NotImplementedError( )
+
+
+class GrowClusterCommand( ClusterCommand, RecreateCommand ):
+ """
+ Increase the size of the cluster
+ """
+
+ def __init__( self, application ):
+ super( GrowClusterCommand, self ).__init__( application )
+ self.cluster = None
+ self.option( '--num-workers', '-s', metavar='NUM',
+ type=int, default=1,
+ help='The number of workers to add.' )
+
+ def option( self, option_name, *args, **kwargs ):
+ _super = super( GrowClusterCommand, self )
+ if option_name in ('role', '--terminate'):
+ # Suppress the role positional argument since the role is hard-wired and the
+ # --terminate option since it doesn't make sense here.
+ return
+ if option_name == '--instance-type':
+ assert 'dest' not in kwargs
+ assert args[ 0 ] == '-t'
+ kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'for the box',
+ 'for the workers' )
+ _super.option( option_name, *args, **kwargs )
+
+ def run_on_cluster( self, options, ctx, cluster ):
+ self.cluster = cluster
+ options.role = self.cluster.worker_role.role( )
+ self.run_on_role( options, ctx, self.cluster.worker_role )
+
+ def creation_kwargs( self, options, box ):
+ return dict( super( GrowClusterCommand, self ).creation_kwargs( options, box ),
+ num_instances=options.num_workers )
+
+ def run_on_box( self, options, first_worker ):
+ """
+ :param cgcloud.core.box.Box first_worker:
+ """
+ log.info( '=== Binding to leader ===' )
+ leader = self.cluster.leader_role( self.cluster.ctx )
+ leader.bind( cluster_name=options.cluster_name,
+ ordinal=options.ordinal,
+ wait_ready=False )
+ log.info( '=== Creating workers ===' )
+ workers = first_worker.list( leader_instance_id=leader.instance_id )
+ used_cluster_ordinals = set( w.cluster_ordinal for w in workers )
+ assert len( used_cluster_ordinals ) == len( workers ) # check for collisions
+ assert 0 not in used_cluster_ordinals # master has 0
+ used_cluster_ordinals.add( 0 ) # to make the math easier
+ cluster_ordinal = allocate_cluster_ordinals( num=options.num_workers,
+ used=used_cluster_ordinals )
+ first_worker.unbind( ) # list() bound it
+ spec = first_worker.prepare( leader_instance_id=leader.instance_id,
+ cluster_name=leader.cluster_name,
+ **self.preparation_kwargs( options, first_worker ) )
+ with thread_pool( min( options.num_threads, options.num_workers ) ) as pool:
+ workers = first_worker.create( spec,
+ cluster_ordinal=cluster_ordinal,
+ executor=pool.apply_async,
+ **self.creation_kwargs( options, first_worker ) )
+ if options.list:
+ self.list( workers )
+ if not workers:
+ log.warn( 'No workers were added to the cluster.' )
+
+
+
+class ApplyClusterCommand( ClusterCommand ):
+ """
+ A command that applies an operation to a running cluster.
+ """
+
+ def __init__( self, application ):
+ super( ApplyClusterCommand, self ).__init__( application )
+ self.option( '--skip-leader', '-L', default=False, action='store_true',
+ help=heredoc( """Don't perform the operation on the leader.""" ) )
+
+
+class ClusterLifecycleCommand( ApplyClusterCommand ):
+ """
+ A command that runs a simple method on each node in a cluster
+ """
+ leader_first = True
+ wait_ready = False
+
+ def run_on_cluster( self, options, ctx, cluster ):
+ cluster.apply( partial( self.run_on_node, options ),
+ cluster_name=options.cluster_name,
+ ordinal=options.ordinal,
+ leader_first=self.leader_first,
+ skip_leader=options.skip_leader,
+ wait_ready=self.wait_ready,
+ pool_size=options.num_threads,
+ operation=self.operation( ) + '()' )
+
+ def run_on_node( self, options, node ):
+ getattr( node, self.operation( ) )( )
+
+ def operation( self ):
+ return abreviated_snake_case_class_name( self.__class__, ClusterCommand )
+
+
+class StopClusterCommand( ClusterLifecycleCommand ):
+ """
+ Stop all nodes of a cluster
+ """
+ leader_first = False
+
+
+class StartClusterCommand( ClusterLifecycleCommand ):
+ """
+ Start all nodes of a cluster
+ """
+ leader_first = True
+
+
+class TerminateClusterCommand( ClusterLifecycleCommand ):
+ """
+ Terminate all nodes of a cluster
+ """
+ leader_first = False
+
+ def __init__( self, application ):
+ super( TerminateClusterCommand, self ).__init__( application )
+ self.option( '--quick', '-Q', default=False, action='store_true',
+ help="""Exit immediately after termination request has been made, don't wait
+ until the cluster is terminated.""" )
+
+ def run_on_node( self, options, node ):
+ node.terminate( wait=not options.quick )
+
+
+# NB: The ordering of bases affects ordering of positionals
+
+class SshClusterCommand( SshCommandMixin, ApplyClusterCommand ):
+ """
+ Run a command via SSH on each node of a cluster. The command is run on the leader first,
+ followed by the workers, serially by default or optionally in parallel.
+ """
+
+ def __init__( self, application ):
+ super( SshClusterCommand, self ).__init__( application )
+ self.option( '--parallel', '-P', default=False, action='store_true',
+ help=heredoc( """Run command on the workers in parallel. Note that this
+ doesn't work if SSH or the command itself prompts for input. This will
+ likely be the case on the first connection attempt when SSH typically
+ prompts for confirmation of the host key. An insecure work-around is to pass
+ "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no".""" ) )
+
+ def run_on_cluster( self, options, ctx, cluster ):
+ exit_codes = [ ]
+ cluster.apply( partial( self.ssh, options ),
+ cluster_name=options.cluster_name,
+ ordinal=options.ordinal,
+ leader_first=True,
+ skip_leader=options.skip_leader,
+ pool_size=options.num_threads if options.parallel else 0,
+ wait_ready=False,
+ callback=exit_codes.append )
+ if any( exit_code for exit_code in exit_codes ):
+ sys.exit( 2 )
+
+
+class RsyncClusterCommand( RsyncCommandMixin, ApplyClusterCommand ):
+ """
+ Run rsync against each node in a cluster. The rsync program will be run against master first,
+ followed by all workers in parallel. To avoid being prompted for confirmation of the host
+ key, use --ssh-opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no".
+ """
+
+ def run_on_cluster( self, options, ctx, cluster ):
+ cluster.apply( partial( self.rsync, options ),
+ cluster_name=options.cluster_name,
+ ordinal=options.ordinal,
+ leader_first=True,
+ skip_leader=options.skip_leader,
+ pool_size=options.num_threads,
+ wait_ready=False )
diff --git a/core/src/cgcloud/core/commands.py b/core/src/cgcloud/core/commands.py
new file mode 100644
index 0000000..3cc2e9d
--- /dev/null
+++ b/core/src/cgcloud/core/commands.py
@@ -0,0 +1,885 @@
+from __future__ import print_function
+
+import argparse
+import functools
+import logging
+import os
+import re
+import sys
+from abc import abstractmethod
+from operator import itemgetter
+
+from bd2k.util.exceptions import panic
+from bd2k.util.expando import Expando
+from bd2k.util.iterables import concat
+from boto.ec2.blockdevicemapping import BlockDeviceType
+from boto.ec2.connection import EC2Connection
+from boto.ec2.group import Group
+from fabric.operations import prompt
+from tabulate import tabulate
+
+from cgcloud.core.box import Box
+from cgcloud.lib.context import Context
+from cgcloud.lib.ec2 import ec2_instance_types
+from cgcloud.lib.util import Application, heredoc
+from cgcloud.lib.util import UserError, Command
+
+log = logging.getLogger( __name__ )
+
+
+class ContextCommand( Command ):
+ """
+ A command that runs in a context. Contexts encapsulate the necessary environment for
+ boxes to run in. The most important aspect of a context is its namespace. Namespaces group
+ boxes and other resources into isolated groups.
+ """
+
+ @abstractmethod
+ def run_in_ctx( self, options, ctx ):
+ """
+ Run this command in the given context.
+
+ :type ctx: Context
+ """
+ raise NotImplementedError( )
+
+ def __init__( self, application, **kwargs ):
+ self.default_namespace = os.environ.get( 'CGCLOUD_NAMESPACE', '/__me__/' )
+ self.default_zone = os.environ.get( 'CGCLOUD_ZONE', None )
+ super( ContextCommand, self ).__init__( application, **kwargs )
+
+ self.option( '--zone', '-z', metavar='ZONE',
+ default=self.default_zone, dest='availability_zone',
+ required=not bool( self.default_zone ),
+ help=heredoc( """The name of the EC2 availability zone to operate in,
+ e.g. us-east-1b, us-west-1b or us-west-2c etc. This argument implies the AWS
+ region to run in. The value of the environment variable CGCLOUD_ZONE,
+ if that variable is present, determines the default.""" ) )
+
+ self.option( '--namespace', '-n', metavar='PREFIX', default=self.default_namespace,
+ help=heredoc( """Optional prefix for naming EC2 resource like instances,
+ images, volumes, etc. Use this option to create a separate namespace in
+ order to avoid collisions, e.g. when running tests. A namespace begins with
+ a slash, followed by zero or more names, each name followed by a slash. Note
+ that this implies that the namespace begins and ends with a slash. Each name
+ must begin with a a digit or lowercase letter followed by zero or more
+ digits, lowercase letters, periods, underscores or dashes. The value of the
+ environment variable CGCLOUD_NAMESPACE, if that variable is present,
+ overrides the default. The string __me__ anywhere in the namespace will be
+ replaced by the name of the IAM user whose credentials are used to issue
+ requests to AWS. If the name of that IAM user contains the @ character,
+ anything after the first occurrance of that character will be discarded
+ before the substitution is done.""" ) )
+
+ def run( self, options ):
+ zone = options.availability_zone
+ namespace = options.namespace
+ ctx = None
+ try:
+ ctx = Context( availability_zone=zone, namespace=namespace )
+ except ValueError as e:
+ raise UserError( cause=e )
+ except:
+ # print the namespace without __me__ substituted
+ log.error( "An error occurred. Using zone '%s' and namespace '%s'", zone, namespace )
+ raise
+ else:
+ # print the namespace with __me__ substituted
+ log.info( "Using zone '%s' and namespace '%s'", ctx.availability_zone, ctx.namespace )
+ return self.run_in_ctx( options, ctx )
+ finally:
+ if ctx is not None: ctx.close( )
+
+
+class RoleCommand( ContextCommand ):
+ """
+ An abstract command that targets boxes of a particular role. Note that there may be more
+ than one box per role. To target a specific box, InstanceCommand might be a better choice.
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( RoleCommand, self ).__init__( application, **kwargs )
+ self.option( 'role', metavar='ROLE', completer=self.completer,
+ help=heredoc( """The name of the role. Use the list-roles command to show
+ all available roles.""" ) )
+
+ # noinspection PyUnusedLocal
+ def completer( self, prefix, **kwargs ):
+ return [ role for role in self.application.roles.iterkeys( ) if role.startswith( prefix ) ]
+
+ def run_in_ctx( self, options, ctx ):
+ role = self.application.roles.get( options.role )
+ if role is None: raise UserError( "No such role: '%s'" % options.role )
+ return self.run_on_role( options, ctx, role )
+
+ @abstractmethod
+ def run_on_role( self, options, ctx, role ):
+ """
+ :type options: dict
+ :type ctx: Context
+ :type role: type[Box]
+ """
+ raise NotImplementedError( )
+
+
+class BoxCommand( RoleCommand ):
+ """
+ An abstract command that runs on a box, i.e. an instance of a role class.
+ """
+
+ def run_on_role( self, options, ctx, role ):
+ box = role( ctx )
+ return self.run_on_box( options, box )
+
+ @abstractmethod
+ def run_on_box( self, options, box ):
+ """
+ Execute this command using the specified parsed command line options on the specified box.
+
+ :type options: dict
+ :type box: Box
+ """
+ raise NotImplementedError( )
+
+ def list( self, boxes, print_header=True ):
+ columns = """
+ cluster_name
+ role_name
+ cluster_ordinal
+ private_ip_address
+ ip_address
+ instance_id
+ instance_type
+ launch_time
+ state
+ zone""".split( )
+
+ if print_header:
+ header = list( columns )
+ header.insert( 2, 'ordinal' )
+ print( '\t'.join( header ) )
+
+ for ordinal, box in enumerate( boxes ):
+ row = [ getattr( box, column ) for column in columns ]
+ row.insert( 2, ordinal )
+ print( '\t'.join( str( column ) for column in row ) )
+
+
+class InstanceCommand( BoxCommand ):
+ """
+ A command that runs on a box bound to a specific EC2 instance.
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( InstanceCommand, self ).__init__( application, **kwargs )
+ self.option( '--cluster-name', '-c', metavar='NAME',
+ help=heredoc( """This option can be used to restrict the selection to boxes
+ that are part of a cluster of the given name. Boxes that are not part of a
+ cluster use their own instance id as the cluster name.""" ) )
+ self.begin_mutex()
+ self.option( '--ordinal', '-o', default=-1, type=int,
+ help=heredoc( """Selects an individual box from the list of boxes performing
+ the specified role in a cluster of the given name. The ordinal is a
+ zero-based index into the list of all boxes performing the specified role,
+ sorted by creation time. This means that the ordinal of a box is not fixed,
+ it may change if another box performing the specified role is terminated. If
+ the ordinal is negative, it will be converted to a positive ordinal by
+ adding the number of boxes performing the specified role. Passing -1,
+ for example, selects the most recently created box.""" ) )
+ self.option( '--instance-id', '-I', default=None, type=str,
+ help=heredoc( """Selects an individual instance. When combined with
+ --cluster-name, the specified instance needs to belong to a cluster of the
+ specified name or an error will be raised.""" ) )
+ self.end_mutex()
+
+ wait_ready = True
+
+ def run_on_box( self, options, box ):
+ if options.instance_id:
+ # Mutual exclusivity is enforced by argparse but we need to unset the default value
+ # for the mutual exclusive options.
+ options.ordinal = None
+ box.bind( ordinal=options.ordinal,
+ cluster_name=options.cluster_name,
+ wait_ready=self.wait_ready,
+ instance_id=options.instance_id )
+ self.run_on_instance( options, box )
+
+ @abstractmethod
+ def run_on_instance( self, options, box ):
+ raise NotImplementedError( )
+
+
+class ListCommand( BoxCommand ):
+ """
+ List the boxes performing a particular role.
+ """
+
+ def __init__( self, application ):
+ super( ListCommand, self ).__init__( application )
+ self.option( '--cluster-name', '-c', metavar='NAME',
+ help='Only list boxes belonging to a cluster of the given name.' )
+
+ def run_on_box( self, options, box ):
+ boxes = box.list( cluster_name=options.cluster_name )
+ self.list( boxes )
+
+
+class UserCommandMixin( Command ):
+ """
+ A command that runs as a given user
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( UserCommandMixin, self ).__init__( application, **kwargs )
+ self.begin_mutex( )
+ self.option( '--login', '-l', default=None, metavar='USER', dest='user',
+ help=heredoc( """Name of user to login as. The default depends on the role,
+ for most roles the default is the administrative user. Roles that define a
+ second less privileged application user will default to that user. Can't be
+ used together with -a, --admin.""" ) )
+ self.option( '--admin', '-a', default=False, action='store_true',
+ help=heredoc( """Force logging in as the administrative user. Can't be used
+ together with -l, --login.""" ) )
+ self.end_mutex( )
+
+ @staticmethod
+ def _user( box, options ):
+ return box.admin_account( ) if options.admin else options.user or box.default_account( )
+
+
+class SshCommandMixin( UserCommandMixin ):
+ def __init__( self, application ):
+ super( SshCommandMixin, self ).__init__( application )
+ self.option( 'command', metavar='...', nargs=argparse.REMAINDER, default=[ ],
+ help=heredoc( """Additional arguments to pass to ssh. This can be anything
+ that one would normally pass to the ssh program excluding user name and host
+ but including, for example, the remote command to execute.""" ) )
+
+ def ssh( self, options, box ):
+ return box.ssh( user=self._user( box, options ), command=options.command )
+
+
+# NB: The ordering of bases affects ordering of positionals
+
+class SshCommand( SshCommandMixin, InstanceCommand ):
+ """
+ Start an interactive SSH session on a box.
+ """
+
+ def run_on_instance( self, options, box ):
+ status = self.ssh( options, box )
+ if status != 0:
+ sys.exit( status )
+
+
+class RsyncCommandMixin( UserCommandMixin ):
+ """
+ Rsync to or from the box
+ """
+
+ def __init__( self, application ):
+ super( RsyncCommandMixin, self ).__init__( application )
+ self.option( '--ssh-opts', '-e', metavar='OPTS', default=None,
+ help=heredoc( """Additional options to pass to ssh. Note that if OPTS starts
+ with a dash you must use the long option followed by an equal sign. For
+ example, to run ssh in verbose mode, use --ssh-opt=-v. If OPTS is to include
+ spaces, it must be quoted to prevent the shell from breaking it up. So to
+ run ssh in verbose mode and log to syslog, you would use --ssh-opt='-v
+ -y'.""" ) )
+ self.option( 'args', metavar='...', nargs=argparse.REMAINDER, default=[ ],
+ help=heredoc( """Command line options for rsync(1). The remote path argument
+ must be prefixed with a colon. For example, 'cgcloud.py rsync foo -av :bar
+ .' would copy the file 'bar' from the home directory of the admin user on
+ the box 'foo' to the current directory on the local machine.""" ) )
+
+ def rsync( self, options, box ):
+ box.rsync( options.args, user=self._user( box, options ), ssh_opts=options.ssh_opts )
+
+
+# NB: The ordering of bases affects ordering of positionals
+
+class RsyncCommand( RsyncCommandMixin, InstanceCommand ):
+ def run_on_instance( self, options, box ):
+ self.rsync( options, box )
+
+
+class ImageCommand( InstanceCommand ):
+ """
+ Create an AMI image of a box performing a given role. The box must be stopped.
+ """
+
+ wait_ready = False
+
+ def run_on_instance( self, options, box ):
+ box.image( )
+
+
+class ShowCommand( InstanceCommand ):
+ """
+ Display the EC2 attributes of the box.
+ """
+
+ def print_object( self, o, visited=set( ), depth=1 ):
+ _id = id( o )
+ if not _id in visited:
+ visited.add( _id )
+ self.print_dict( o.__dict__, visited, depth )
+ visited.remove( _id )
+ if depth == 1: sys.stdout.write( '\n' )
+
+ def print_dict( self, d, visited, depth ):
+ for k, v in sorted( d.iteritems( ), key=itemgetter( 0 ) ):
+ k = str( k )
+ if k[ 0:1 ] != '_' \
+ and k != 'connection' \
+ and not isinstance( v, EC2Connection ):
+ sys.stdout.write( '\n%s%s: ' % ('\t' * depth, k) )
+ if isinstance( v, str ):
+ sys.stdout.write( v.strip( ) )
+ if isinstance( v, unicode ):
+ sys.stdout.write( v.encode( 'utf8' ).strip( ) )
+ elif hasattr( v, 'iteritems' ):
+ self.print_dict( v, visited, depth + 1 )
+ elif hasattr( v, '__iter__' ):
+ self.print_dict( dict( enumerate( v ) ), visited, depth + 1 )
+ elif isinstance( v, BlockDeviceType ) \
+ or isinstance( v, Group ):
+ self.print_object( v, visited, depth + 1 )
+ else:
+ sys.stdout.write( repr( v ) )
+
+ wait_ready = False
+
+ def run_on_instance( self, options, box ):
+ self.print_object( box.instance )
+
+
+class LifecycleCommand( InstanceCommand ):
+ """
+ Transition an instance box into a particular state.
+ """
+ wait_ready = False
+
+ def run_on_instance( self, options, box ):
+ getattr( box, self.name( ) )( )
+
+
+class StartCommand( LifecycleCommand ):
+ """
+ Start the box, ie. bring it from the stopped state to the running state.
+ """
+ pass
+
+
+class StopCommand( LifecycleCommand ):
+ """
+ Stop the box, ie. bring it from the running state to the stopped state.
+ """
+ pass
+
+
+class RebootCommand( LifecycleCommand ):
+ """
+ Stop the box, then start it again.
+ """
+ pass
+
+
+class TerminateCommand( LifecycleCommand ):
+ """
+ Terminate the box, ie. delete it permanently.
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( TerminateCommand, self ).__init__( application, **kwargs )
+ self.option( '--quick', '-Q', default=False, action='store_true',
+ help=heredoc( """Exit immediately after termination request has been made,
+ don't wait until the box is terminated.""" ) )
+
+ def run_on_instance( self, options, box ):
+ box.terminate( wait=not options.quick )
+
+
+class ListImagesCommand( BoxCommand ):
+ """
+ List the AMI images that were created from boxes performing a particular role.
+ """
+
+ def run_on_box( self, options, box ):
+ for ordinal, image in enumerate( box.list_images( ) ):
+ print( '{name}\t{ordinal}\t{id}\t{state}'.format( ordinal=ordinal,
+ **image.__dict__ ) )
+
+
+class CreationCommand( BoxCommand ):
+ def __init__( self, application ):
+ super( CreationCommand, self ).__init__( application )
+ default_ec2_keypairs = os.environ.get( 'CGCLOUD_KEYPAIRS', '__me__' ).split( )
+ self.option( '--keypairs', '-k', metavar='NAME',
+ dest='ec2_keypair_names', nargs='+',
+ default=default_ec2_keypairs,
+ help=heredoc( """The names of EC2 key pairs whose public key is to be
+ injected into the box to facilitate SSH logins. For the first listed
+ argument, the so called primary key pair, a matching private key needs to be
+ present locally. All other arguments may use shell-style globs in which case
+ every key pair whose name matches one of the globs will be deployed to the
+ box. The cgcloudagent program that will typically be installed on a box
+ keeps the deployed list of authorized keys up to date in case matching keys
+ are added or removed from EC2. The value of the environment variable
+ CGCLOUD_KEYPAIRS, if that variable is present, overrides the default for
+ this option. The string __me__ anywhere in an argument will be substituted
+ with the name of the IAM user whose credentials are used to issue requests
+ to AWS. An argument beginning with a single @ will be looked up as the name
+ of an IAM user. If that user exists, the name will be used as the name of a
+ key pair. Otherwise an exception is raised. An argument beginning with @@
+ will be looked up as an IAM group and the name of each user in that group
+ will be used as the name of a keypair. Note that the @ and @@ substitutions
+ depend on the convention that the user and the corresponding key pair have
+ the same name. They only require the respective user or group to exist,
+ while the key pair may be missing. If such a missing key pair is later
+ added, cgcloudagent will automatically add that key pair's public to the
+ list of SSH keys authorized to login to the box. Shell-style globs can not
+ be combined with @ or @@ substitutions within one argument.""" ) )
+
+ self.option( '--instance-type', '-t', metavar='TYPE', choices=ec2_instance_types.keys( ),
+ default=os.environ.get( 'CGCLOUD_INSTANCE_TYPE', None ),
+ help=heredoc( """The type of EC2 instance to launch for the box,
+ e.g. t2.micro, m3.small, m3.medium, or m3.large etc. The value of the
+ environment variable CGCLOUD_INSTANCE_TYPE, if that variable is present,
+ overrides the default, an instance type appropriate for the role.""" ) )
+
+ self.option( '--virtualization-type', metavar='TYPE', choices=Box.virtualization_types,
+ help=heredoc( """The virtualization type to be used for the instance. This
+ affects the choice of image (AMI) the instance is created from. The default
+ depends on the instance type, but generally speaking, 'hvm' will be used for
+ newer instance types.""" ) )
+
+ self.option( '--spot-bid', metavar='AMOUNT', type=float,
+ help=heredoc( """The maximum price to pay for the specified instance type,
+ in dollars per hour as a floating point value, 1.23 for example. Only bids
+ under double the instance type's average price for the past week will be
+ accepted. By default on-demand instances are used. Note that some instance
+ types are not available on the spot market!""" ) )
+
+ self.option( '--vpc', metavar='VPC_ID', type=str, dest='vpc_id',
+ help=heredoc( """The ID of a VPC to create the instance and associated
+ security group in. If this option is absent and the AWS account has a
+ default VPC, the default VPC will be used. This is the most common case. If
+ this option is absent and the AWS account has EC2 Classic enabled and the
+ selected instance type supports EC2 classic mode, no VPC will be used. If
+ this option is absent and the AWS account has no default VPC and an instance
+ type that only supports VPC is used, an exception will be raised.""" ) )
+
+ self.option( '--subnet', metavar='SUBNET_ID', type=str, dest='subnet_id',
+ help=heredoc( """The ID of a subnet to allocate the instance's private IP
+ address from. Can't be combined with --spot-auto-zone. The specified subnet
+ must belong to the specified VPC (or the default VPC if none was given) and
+ reside in the availability zone given via CGCLOUD_ZONE or --zone. If this
+ option is absent, cgcloud will attempt to choose a subnet automatically.""" ) )
+
+ self.option( '--spot-launch-group', metavar='NAME',
+ help=heredoc( """The name of an EC2 spot instance launch group. If
+ specified, the spot request will only be fullfilled once all instances in
+ the group can be launched. Furthermore, if any instance in the group needs
+ to be terminated by Amazon, so will the remaining ones, even if their bid is
+ higher than the market price.""" ) )
+
+ self.option( '--spot-auto-zone', default=False, action='store_true',
+ help=heredoc( """Ignore --zone/CGCLOUD_ZONE and instead choose the best EC2
+ availability zone for spot instances based on a heuristic.""" ) )
+
+ self.option( '--spot-timeout', metavar='SECONDS', type=float,
+ help=heredoc( """The maximum time to wait for spot instance requests to
+ enter the active state. Requests that are not active when the timeout fires
+ will be cancelled.""" ) )
+
+ self.option( '--spot-tentative', default=False, action='store_true',
+ help=heredoc( """Give up on a spot request at the earliest indication of it
+ not being fulfilled immediately.""" ) )
+
+ self.option( '--list', default=False, action='store_true',
+ help=heredoc( """List all instances created by this command on success.""" ) )
+
+ option_name_re = re.compile( r'^[A-Za-z][0-9A-Za-z_]*$' )
+
+ def option( o ):
+ l = o.split( '=', 1 )
+ if len( l ) != 2:
+ raise ValueError( "An option must be of the form NAME=VALUE. '%s' is not." % o )
+ k, v = l
+ if not option_name_re.match( k ):
+ raise ValueError( "An option name must start with a letter and contain only "
+ "letters, digits and underscore. '%s' does not." % o )
+ return k, v
+
+ self.option( '--option', '-O', metavar='NAME=VALUE',
+ type=option, action='append', default=[ ], dest='role_options',
+ help=heredoc( """Set a role-specific option for the instance. To see a list
+ of options for a role, use the list-options command.""" ) )
+
+ self.begin_mutex( )
+
+ self.option( '--terminate', '-T',
+ default=None, action='store_true',
+ help=heredoc( """Terminate the box when setup is complete. The default is to
+ leave the box running except when errors occur.""" ) )
+
+ self.option( '--never-terminate', '-N',
+ default=None, dest='terminate', action='store_false',
+ help=heredoc( """Never terminate the box, even after errors. This may be
+ useful for a post-mortem diagnosis.""" ) )
+
+ self.end_mutex( )
+
+ @abstractmethod
+ def run_on_creation( self, box, options ):
+ """
+ Run on the given box after it was created.
+ """
+ raise NotImplementedError( )
+
+ def preparation_kwargs( self, options, box ):
+ """
+ Return dict with keyword arguments to be passed box.prepare()
+ """
+ role_options = box.get_role_options( )
+ supported_options = set( option.name for option in role_options )
+ actual_options = set( name for name, value in options.role_options )
+ for name in actual_options - supported_options:
+ raise UserError( "Options %s not supported by role '%s'." % (name, box.role( )) )
+ resolve_me = functools.partial( box.ctx.resolve_me, drop_hostname=False )
+ return dict( options.role_options,
+ ec2_keypair_globs=map( resolve_me, options.ec2_keypair_names ),
+ instance_type=options.instance_type,
+ virtualization_type=options.virtualization_type,
+ vpc_id=options.vpc_id,
+ subnet_id=options.subnet_id,
+ spot_bid=options.spot_bid,
+ spot_launch_group=options.spot_launch_group,
+ spot_auto_zone=options.spot_auto_zone )
+
+ def creation_kwargs( self, options, box ):
+ return dict( terminate_on_error=options.terminate is not False,
+ spot_timeout=options.spot_timeout,
+ spot_tentative=options.spot_tentative )
+
+ def run_on_box( self, options, box ):
+ """
+ :type box: Box
+ """
+ spec = box.prepare( **self.preparation_kwargs( options, box ) )
+ box.create( spec, **self.creation_kwargs( options, box ) )
+ try:
+ self.run_on_creation( box, options )
+ except:
+ if options.terminate is not False:
+ with panic( log ):
+ box.terminate( wait=False )
+ raise
+ else:
+ if options.list:
+ self.list( [ box ] )
+ if options.terminate is True:
+ box.terminate( )
+ else:
+ self.log_ssh_hint( options )
+
+ # noinspection PyUnresolvedReferences
+ def log_ssh_hint( self, options ):
+ hint = self.ssh_hint( options )
+
+ def opt( name, value, default ):
+ return name + ' ' + value if value != default else None
+
+ cmd = concat( hint.executable,
+ hint.command,
+ (opt( **option ) for option in hint.options),
+ hint.args )
+ cmd = ' '.join( filter( None, cmd ) )
+ log.info( "Run '%s' to start using this %s.", cmd, hint.object )
+
+ def ssh_hint( self, options ):
+ x = Expando
+ return x( executable=os.path.basename( sys.argv[ 0 ] ),
+ command='ssh',
+ options=[
+ x( name='-n', value=options.namespace, default=self.default_namespace ),
+ x( name='-z', value=options.availability_zone, default=self.default_zone ) ],
+ args=[ options.role ],
+ object='box' )
+
+
+class RegisterKeyCommand( ContextCommand ):
+ """
+ Upload an OpenSSH public key for future injection into boxes. The public key will be imported
+ into EC2 as a keypair and stored verbatim in S3.
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( RegisterKeyCommand, self ).__init__( application, **kwargs )
+ self.option( 'ssh_public_key', metavar='KEY_FILE',
+ help=heredoc( """Path of file containing the SSH public key to upload to the
+ EC2 keypair.""" ) )
+ self.option( '--force', '-F', default=False, action='store_true',
+ help='Overwrite potentially existing EC2 key pair' )
+ self.option( '--keypair', '-k', metavar='NAME',
+ dest='ec2_keypair_name', default='__me__',
+ help=heredoc( """The desired name of the EC2 key pair. The name should
+ associate the key with you in a way that it is obvious to other users in
+ your organization. The string __me__ anywhere in the key pair name will be
+ replaced with the name of the IAM user whose credentials are used to issue
+ requests to AWS.""" ) )
+
+ def run_in_ctx( self, options, ctx ):
+ with open( options.ssh_public_key ) as f:
+ ssh_public_key = f.read( )
+ try:
+ ctx.register_ssh_pubkey( ec2_keypair_name=ctx.resolve_me( options.ec2_keypair_name,
+ drop_hostname=False ),
+ ssh_pubkey=ssh_public_key,
+ force=options.force )
+ except ValueError as e:
+ raise UserError( cause=e )
+
+
+class ListRolesCommand( Command ):
+ """
+ List available roles. A role is a template for a box. A box is a virtual machines in EC2,
+ also known as an instance.
+ """
+
+ def run( self, options ):
+ print( tabulate( (name , (role.__doc__ or '').strip().split('\n')[0].strip())
+ for name, role in self.application.roles.iteritems( ) ) )
+ log.info( "If you are expecting to see more roles listed above, you may need to set/change "
+ "the CGCLOUD_PLUGINS environment variable." )
+
+
+# noinspection PyAbstractClass
+class ImageReferenceCommand( Command ):
+ """
+ Any command that accepts an image ordinal or AMI ID.
+
+ >>> app = Application()
+ >>> class FooCmd( ImageReferenceCommand ):
+ ... long_image_option = '--foo'
+ ... short_image_option = '-f'
+ ... def run(self, options):
+ ... pass
+ >>> cmd = FooCmd( app )
+ >>> cmd.ordinal_or_ami_id( 'bar' )
+ Traceback (most recent call last):
+ ...
+ ValueError
+ >>> cmd.ordinal_or_ami_id( '' )
+ Traceback (most recent call last):
+ ...
+ ValueError
+ >>> cmd.ordinal_or_ami_id( '-1')
+ -1
+ >>> cmd.ordinal_or_ami_id( 'ami-4dcced7d')
+ 'ami-4dcced7d'
+ >>> cmd.ordinal_or_ami_id( 'ami-4dCCED7D')
+ 'ami-4dcced7d'
+ >>> cmd.ordinal_or_ami_id( 'amI-4dCCED7D')
+ Traceback (most recent call last):
+ ...
+ ValueError
+ >>> cmd.ordinal_or_ami_id( 'ami-4dcced7')
+ Traceback (most recent call last):
+ ...
+ ValueError
+ >>> cmd.ordinal_or_ami_id( 'ami-4dCCED7DD')
+ Traceback (most recent call last):
+ ...
+ ValueError
+ """
+ ami_id_re = re.compile( r'^ami-([0-9a-fA-F]{8})$' )
+
+ def ordinal_or_ami_id( self, s ):
+ try:
+ return int( s )
+ except ValueError:
+ if self.ami_id_re.match( s ):
+ return s.lower( )
+ else:
+ raise ValueError( )
+
+ long_image_option = None
+ short_image_option = None
+
+ def __init__( self, application ):
+ super( ImageReferenceCommand, self ).__init__( application )
+ self.option( self.long_image_option, self.short_image_option, metavar='IMAGE',
+ type=self.ordinal_or_ami_id, default=-1, # default to the last one
+ help=heredoc( """An image ordinal, i.e. the index of an image in the list of
+ images for the given role, sorted by creation time. Use the list-images
+ command to print a list of images for a given role. If the ordinal is
+ negative, it will be converted to a positive ordinal by adding the total
+ number of images for this role. Passing -1, for example, selects the most
+ recently created image. Alternatively, an AMI ID, e.g. 'ami-4dcced7d' can be
+ passed in as well.""" ) )
+
+
+class DeleteImageCommand( ImageReferenceCommand, BoxCommand ):
+ long_image_option = '--image'
+ short_image_option = '-i'
+
+ def __init__( self, application ):
+ super( DeleteImageCommand, self ).__init__( application )
+ self.begin_mutex( )
+ self.option( '--keep-snapshot', '-K',
+ default=False, action='store_true',
+ help=heredoc( """Do not delete the EBS volume snapshot associated with the
+ given image. This will leave an orphaned snapshot which should be removed at
+ a later time using the 'cgcloud cleanup' command.""" ) )
+ self.option( '--quick', '-Q', default=False, action='store_true',
+ help=heredoc( """Exit immediately after deregistration request has been made,
+ don't wait until the image is deregistered. Implies --keep-snapshot.""" ) )
+ self.end_mutex( )
+
+ def run_on_box( self, options, box ):
+ box.delete_image( options.image,
+ wait=not options.quick,
+ delete_snapshot=not options.keep_snapshot )
+
+
+class RecreateCommand( ImageReferenceCommand, CreationCommand ):
+ """
+ Recreate a box from an image that was taken from an earlier incarnation of the box
+ """
+ long_image_option = '--boot-image'
+ short_image_option = '-i'
+
+ def __init__( self, application ):
+ super( RecreateCommand, self ).__init__( application )
+ self.option( '--quick', '-Q', default=False, action='store_true',
+ help=heredoc( """Don't wait for the box to become running or reachable via
+ SSH. If the agent is disabled in the boot image (this is uncommon,
+ see the --no-agent option to the 'create' command), no additional SSH
+ keypairs will be deployed.""" ) )
+
+ def preparation_kwargs( self, options, box ):
+ return dict( super( RecreateCommand, self ).preparation_kwargs( options, box ),
+ image_ref=options.boot_image )
+
+ def creation_kwargs( self, options, box ):
+ return dict( super( RecreateCommand, self ).creation_kwargs( options, box ),
+ wait_ready=not options.quick )
+
+ def run_on_creation( self, box, options ):
+ pass
+
+
+class CreateCommand( CreationCommand ):
+ """
+ Create a box performing the specified role, install an OS and additional packages on it and
+ optionally create an AMI image of it.
+ """
+
+ def __init__( self, application ):
+ super( CreateCommand, self ).__init__( application )
+ self.option( '--boot-image', '-i', metavar='AMI_ID',
+ help=heredoc( """The AMI ID of the image from which to create the box. This
+ argument is optional and the default is determined automatically based on
+ the role. Typically, this option does not need to be used.""" ) )
+ self.option( '--no-agent',
+ default=False, action='store_true',
+ help=heredoc( """Don't install the cghub-cloud-agent package on the box. One
+ note-worthy effect of using this option this is that the SSH keys will be
+ installed initially, but not maintained over time.""" ) )
+ self.option( '--create-image', '-I',
+ default=False, action='store_true',
+ help='Create an image of the box as soon as setup completes.' )
+ # FIXME: Take a second look at this: Does it work. Is it necessary?
+ self.option( '--upgrade', '-U',
+ default=False, action='store_true',
+ help=heredoc( """Bring the package repository as well as any installed
+ packages up to date, i.e. do what on Ubuntu is achieved by doing 'sudo
+ apt-get update ; sudo apt-get upgrade'.""" ) )
+
+ def preparation_kwargs( self, options, box ):
+ return dict( super( CreateCommand, self ).preparation_kwargs( options, box ),
+ image_ref=options.boot_image,
+ enable_agent=not options.no_agent )
+
+ def run_on_creation( self, box, options ):
+ box.setup( upgrade_installed_packages=options.upgrade )
+ if options.create_image:
+ box.stop( )
+ box.image( )
+ if options.terminate is not True:
+ box.start( )
+
+
+class ListOptionsCommand( RoleCommand ):
+ def run_on_role( self, options, ctx, role ):
+ role_options = role.get_role_options( )
+ if role_options:
+ for option in role_options:
+ print( "{name}: {help}".format( **option.to_dict( ) ) )
+ else:
+ print( 'The role %s does not define any options' % role.role( ) )
+
+
+class CleanupCommand( ContextCommand ):
+ """
+ Lists and optionally deletes unused AWS resources after prompting for confirmation.
+ """
+
+ def run_in_ctx( self, options, ctx ):
+ self.cleanup_image_snapshots( ctx )
+ self.cleanup_ssh_pubkeys( ctx )
+
+ @staticmethod
+ def cleanup_ssh_pubkeys( ctx ):
+ unused_fingerprints = ctx.unused_fingerprints( )
+ if unused_fingerprints:
+ print( 'The following public keys in S3 are not referenced by any EC2 keypairs:' )
+ for fingerprint in unused_fingerprints:
+ print( fingerprint )
+ if 'yes' == prompt( 'Delete these public keys from S3? (yes/no)', default='no' ):
+ ctx.delete_fingerprints( unused_fingerprints )
+ else:
+ print( 'No orphaned public keys in S3.' )
+
+ @staticmethod
+ def cleanup_image_snapshots( ctx ):
+ unused_snapshots = ctx.unused_snapshots( )
+ if unused_snapshots:
+ print( 'The following snapshots are not referenced by any images:' )
+ for snapshot_id in unused_snapshots:
+ print( snapshot_id )
+ if 'yes' == prompt( 'Delete these snapshots? (yes/no)', default='no' ):
+ ctx.delete_snapshots( unused_snapshots )
+ else:
+ print( 'No unused EBS volume snapshots in EC2.' )
+
+
+class ResetSecurityCommand( ContextCommand ):
+ """
+ Delete security-related objects like IAM instance profiles or EC2 security groups in a
+ namespace and its children.
+ """
+
+ def run_in_ctx( self, options, ctx ):
+ message = ("Do you really want to delete all IAM instance profiles, IAM roles and EC2 "
+ "security groups in namespace %s and its children? Although these resources "
+ "will be created on-the-fly for newly created boxes, existing boxes will "
+ "likely be impacted negatively." % ctx.namespace)
+ if 'yes' == prompt( message + ' (yes/no)', default='no' ):
+ ctx.reset_namespace_security( )
+
+
+class UpdateInstanceProfile( InstanceCommand ):
+ """
+ Update the instance profile and associated IAM roles for a given role.
+
+ This command ensures that a box of this role has accurate and up-to-date privileges to
+ interact with AWS resources. The instance profile is updated whenever a box is created. Use
+ this command to update the instance profile for existing boxes.
+ """
+
+ def run_on_instance( self, options, box ):
+ box.get_instance_profile_arn( )
diff --git a/core/src/cgcloud/core/common_iam_policies.py b/core/src/cgcloud/core/common_iam_policies.py
new file mode 100644
index 0000000..b4349f2
--- /dev/null
+++ b/core/src/cgcloud/core/common_iam_policies.py
@@ -0,0 +1,23 @@
+ec2_read_only_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="ec2:Describe*" ),
+ dict( Effect="Allow", Resource="*", Action="autoscaling:Describe*" ),
+ dict( Effect="Allow", Resource="*", Action="elasticloadbalancing:Describe*" ),
+ dict( Effect="Allow", Resource="*", Action=[
+ "cloudwatch:ListMetrics",
+ "cloudwatch:GetMetricStatistics",
+ "cloudwatch:Describe*" ] ) ] )
+
+s3_read_only_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action=[ "s3:Get*", "s3:List*" ] ) ] )
+
+iam_read_only_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action=[ "iam:List*", "iam:Get*" ] ) ] )
+
+ec2_full_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="ec2:*" ) ] )
+
+s3_full_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="s3:*" ) ] )
+
+sdb_full_policy = dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="sdb:*" ) ] )
diff --git a/core/src/cgcloud/core/deprecated.py b/core/src/cgcloud/core/deprecated.py
new file mode 100644
index 0000000..e6bf575
--- /dev/null
+++ b/core/src/cgcloud/core/deprecated.py
@@ -0,0 +1,8 @@
+def deprecated( artifact ):
+ # TODO: print a warning when deprecated class or function is used
+ artifact.__cgcloud_core_deprecated__ = True
+ return artifact
+
+
+def is_deprecated( artifact ):
+ return getattr( artifact, '__cgcloud_core_deprecated__ ', False )
diff --git a/core/src/cgcloud/core/docker_box.py b/core/src/cgcloud/core/docker_box.py
new file mode 100644
index 0000000..5ccefa0
--- /dev/null
+++ b/core/src/cgcloud/core/docker_box.py
@@ -0,0 +1,117 @@
+import logging
+from pipes import quote
+
+from fabric.operations import run
+
+from bd2k.util.strings import interpolate as fmt
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.ubuntu_box import UbuntuBox
+from cgcloud.fabric.operations import sudo
+from cgcloud.lib.util import heredoc
+
+log = logging.getLogger( __name__ )
+
+
+class DockerBox( UbuntuBox ):
+ """
+ A mixin for Docker. Based on the official shell script from
+
+ https://docs.docker.com/installation/ubuntulinux/#installation
+ """
+
+ @fabric_task
+ def _setup_package_repos( self ):
+ assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \
+ "Need HTTPS support in apt-get in order to install from the Docker repository"
+ super( DockerBox, self )._setup_package_repos( )
+ sudo( ' '.join( [ 'apt-key', 'adv',
+ '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80',
+ '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) )
+ codename = self.release( ).codename
+ sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main '
+ '> /etc/apt/sources.list.d/docker.list' ) )
+
+ @fabric_task
+ def _list_packages_to_install( self ):
+ kernel = run( 'uname -r' )
+ kernel_version = tuple( map( int, kernel.split( '.' )[ :2 ] ) )
+ assert kernel_version >= (3, 10), \
+ "Need at least kernel version 3.10, found '%s'." % kernel
+ kernel = run( 'uname -r' )
+ assert kernel.endswith( '-generic' ), \
+ 'Current kernel is not supported by the linux-image-extra-virtual package.'
+ packages = super( DockerBox, self )._list_packages_to_install( )
+ packages += [ 'docker-engine=1.9.1-0~trusty', 'linux-image-extra-' + kernel, 'linux-image-extra-virtual' ]
+ if run( 'cat /sys/module/apparmor/parameters/enabled' ).lower( ).startswith( 'y' ):
+ packages += [ 'apparmor' ]
+ return packages
+
+ def _post_install_packages( self ):
+ super( DockerBox, self )._post_install_packages( )
+ self._setup_docker( )
+
+ def _docker_users( self ):
+ return [ self.admin_account( ) ]
+
+ def _docker_data_prefixes( self ):
+ return [ self._ephemeral_mount_point( 0 ) ]
+
+ @fabric_task
+ def _setup_docker( self ):
+ for docker_user in set( self._docker_users( ) ):
+ sudo( "usermod -aG docker " + docker_user )
+ prefixes = self._docker_data_prefixes( )
+ if prefixes:
+ prefixes = ' '.join( map( quote, prefixes ) )
+ self._run_init_script( 'docker', 'stop' )
+ # Make sure Docker's aufs backend isn't mounted anymore
+ sudo( 'umount /var/lib/docker/aufs', warn_only=True )
+ # Backup initial state of data directory so we can initialize an empty ephemeral volume
+ sudo( 'tar -czC /var/lib docker > /var/lib/docker.tar.gz' )
+ # Then delete it and recreate it as an empty directory to serve as the bind mount point
+ sudo( 'rm -rf /var/lib/docker && mkdir /var/lib/docker' )
+ self._register_init_script(
+ 'dockerbox',
+ heredoc( """
+ description "Placement of /var/lib/docker"
+ console log
+ start on starting docker
+ stop on stopped docker
+ pre-start script
+ echo
+ echo "This is the dockerbox pre-start script"
+ set -ex
+ if mountpoint -q /var/lib/docker; then
+ echo "The directory '/var/lib/docker' is already mounted, exiting."
+ else
+ for prefix in {prefixes}; do
+ # Prefix must refer to a separate volume, e.g. ephemeral or EBS
+ if mountpoint -q "$prefix"; then
+ # Make sure Docker's aufs backend isn't mounted anymore
+ umount /var/lib/docker/aufs || true
+ if test -d "$prefix/var/lib/docker"; then
+ echo "The directory '$prefix/var/lib/docker' already exists, using it."
+ else
+ mkdir -p "$prefix/var/lib"
+ # If /var/lib/docker contains files ...
+ if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then
+ # ... move it to prefix ...
+ mv /var/lib/docker "$prefix/var/lib"
+ # ... and recreate it as an empty mount point, ...
+ mkdir -p /var/lib/docker
+ else
+ # ... otherwise untar the initial backup.
+ tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz
+ fi
+ fi
+ # Now bind-mount into /var/lib/docker
+ mount --bind "$prefix/var/lib/docker" /var/lib/docker
+ break
+ else
+ echo "The prefix directory '$prefix' is not a mount point, skipping."
+ fi
+ done
+ fi
+ end script""" ) )
+ self._run_init_script( 'docker', 'start' )
diff --git a/core/src/cgcloud/core/fedora_box.py b/core/src/cgcloud/core/fedora_box.py
new file mode 100644
index 0000000..394af99
--- /dev/null
+++ b/core/src/cgcloud/core/fedora_box.py
@@ -0,0 +1,76 @@
+from abc import abstractmethod
+import re
+from operator import attrgetter
+
+from fabric.operations import sudo
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.agent_box import AgentBox
+from cgcloud.core.cloud_init_box import CloudInitBox
+from cgcloud.core.rc_local_box import RcLocalBox
+from cgcloud.core.yum_box import YumBox
+
+
+class FedoraBox( YumBox, AgentBox, CloudInitBox, RcLocalBox ):
+ """
+ A box that boots of an official Fedora cloud AMI
+ """
+
+ @abstractmethod
+ def release( self ):
+ """
+ :return: the version number of the Fedora release, e.g. 17
+ :rtype: int
+ """
+ raise NotImplementedError
+
+ def admin_account( self ):
+ return "fedora" if self.release( ) >= 19 else "ec2-user"
+
+ def _base_image( self, virtualization_type ):
+ release = self.release( )
+ name = None
+ if release < 21:
+ name = 'Fedora-x86_64-%i-*' % release
+ elif release == 21:
+ name = 'Fedora-Cloud-Base-*-21.x86_64-*'
+ else:
+ name = 'Fedora-Cloud-Base-%s-*.x86_64-*' % release
+ images = self.ctx.ec2.get_all_images(
+ owners=[ '125523088429' ],
+ filters={
+ 'name': name,
+ 'root-device-type': 'ebs',
+ 'virtualization-type': virtualization_type } )
+ images = [ i for i in images if not re.search( 'Alpha|Beta', i.name ) ]
+ if not images:
+ raise self.NoSuchImageException(
+ "Can't find any AMIs for Fedora %i and virtualization type %s" % (
+ release, virtualization_type ) )
+ images.sort( key=attrgetter( 'name' ), reverse=True )
+ if False:
+ if len( images ) > 1:
+ raise RuntimeError(
+ "Found more than one AMI for Fedora %i and virtualization type %s" % (
+ release, virtualization_type ) )
+ return images[0]
+
+ def _list_packages_to_install( self ):
+ return super( FedoraBox, self )._list_packages_to_install( ) + [
+ 'redhat-lsb' # gets us lsb_release
+ ]
+
+ def _get_package_substitutions( self ):
+ return super( FedoraBox, self )._get_package_substitutions( ) + [
+ # Without openssl-devel, the httplib module disables HTTPS support. The underlying
+ # 'import _ssl' fails with ImportError: /usr/lib64/python2.7/lib-dynload/_ssl.so:
+ # symbol SSLeay_version, version OPENSSL_1.0.1 not defined in file libcrypto.so.10
+ # with link time reference. This packet substitution ensures that if Python is to be installed, openssl-devel is too.
+ ( 'python', ( 'python', 'openssl-devel' ) )
+ ]
+
+ @fabric_task
+ def _get_rc_local_path( self ):
+ rc_local_path = '/etc/rc.d/rc.local'
+ sudo( 'test -f {f} || echo "#!/bin/sh" > {f} && chmod +x {f}'.format( f=rc_local_path ) )
+ return rc_local_path
diff --git a/core/src/cgcloud/core/generic_boxes.py b/core/src/cgcloud/core/generic_boxes.py
new file mode 100644
index 0000000..d707285
--- /dev/null
+++ b/core/src/cgcloud/core/generic_boxes.py
@@ -0,0 +1,303 @@
+from urlparse import urlparse
+
+from fabric.operations import run, sudo, os
+
+from cgcloud.core.deprecated import deprecated
+from cgcloud.core.box import fabric_task
+from cgcloud.core.centos_box import CentosBox
+from cgcloud.core.fedora_box import FedoraBox
+from cgcloud.core.ubuntu_box import UpstartUbuntuBox, SystemdUbuntuBox
+
+
+ at deprecated
+class GenericCentos5Box( CentosBox ):
+ """
+ Good ole CentOS 5 from 1995, more or less
+ """
+
+ def release( self ):
+ return '5.8'
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ # On t1.micro, the agent installation runs out of memory
+ return "m1.small"
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+ def __update_sudo( self ):
+ """
+ 5.8 has sudo 1.7.2p1 whose -i switch is horribly broken. For example,
+
+ sudo -u jenkins -i bash -c 'echo bla >> ~/foo'
+
+ doesn't work as expected. In sudo 1.8.7, it does. We do need sudo -i in some of the
+ subclasses (see cghub.fabric.operations for how we hack -i into Fabric 1.7.x) and so we
+ install a newer version of the sudo rpm from the sudo maintainer.
+
+ This method should to be invoked early on during setup.
+ """
+ self._yum_local( is_update=True, rpm_urls=[
+ 'ftp://ftp.sudo.ws/pub/sudo/packages/Centos/5/sudo-1.8.14-4.el5.x86_64.rpm' ] )
+
+ def _on_instance_ready( self, first_boot ):
+ super( GenericCentos5Box, self )._on_instance_ready( first_boot )
+ if self.generation == 0 and first_boot:
+ self.__update_sudo( )
+ if False:
+ self._update_openssh( )
+
+ def _ephemeral_mount_point( self, i ):
+ return "/mnt" if i == 0 else None
+
+ # FIXME: These two methods assume that this class is derived from AgentBox.
+
+ def _get_package_substitutions( self ):
+ return super( GenericCentos5Box, self )._get_package_substitutions( ) + [
+ ('python', 'python26'),
+ ('python-devel', 'python26-devel')
+ ]
+
+ def _post_install_packages( self ):
+ if 'python' in self._list_packages_to_install( ):
+ self.__update_python( )
+ super( GenericCentos5Box, self )._post_install_packages( )
+
+ @fabric_task
+ def __update_python( self ):
+ # The pip from the python-pip package is hard-wired to the python 2.4 from the python
+ # package. Also it's ancient, fossilized crap. To get an up-to-date pip that is
+ # wired to python 2.6 from the python26 package we have to jump though some hoops.
+ # First, we need to ignore certs since the CA package on CentOS 5 is, you guessed it,
+ # out of date. We do this globally because the downloaded .py scripts execute wget
+ # internally. Nevertheless, we got cert errors with github.com and so we are using
+ # curl instead to download the scripts from there.
+ sudo( 'echo "check_certificate=off" > /root/.wgetrc' )
+ # Then install setuptools ...
+ run( 'curl -O https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py' )
+ sudo( 'python2.6 ez_setup.py' )
+ # .. and pip.
+ run( 'curl -O https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py' )
+ sudo( 'python2.6 get-pip.py' )
+ sudo( 'rm /root/.wgetrc' )
+
+
+class GenericCentos6Box( CentosBox ):
+ """
+ Generic box with Centos 6.4
+ """
+
+ def release( self ):
+ return '6.4'
+
+ def _ephemeral_mount_point( self, i ):
+ return "/mnt/ephemeral" if i == 0 else None
+
+ def _on_instance_ready( self, first_boot ):
+ super( GenericCentos6Box, self )._on_instance_ready( first_boot )
+ if self.generation == 0 and first_boot:
+ if False:
+ self._update_openssh( )
+
+
+ at deprecated
+class GenericUbuntuLucidBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='lucid', version='10.04' )
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+ def _get_virtual_block_device_prefix( self ):
+ return "/dev/sd"
+
+ @fabric_task
+ def __update_sudo( self ):
+ """
+ See GenericCentos5Box
+ """
+ url = 'ftp://ftp.sudo.ws/pub/sudo/packages/Ubuntu/10.04/sudo_1.8.14-4_amd64.deb'
+ package = os.path.basename( urlparse( url ).path )
+ run( 'wget ' + url )
+ sudo( 'sudo dpkg --force-confold -i ' + package )
+ run( 'rm ' + package )
+
+ def _on_instance_ready( self, first_boot ):
+ super( GenericUbuntuLucidBox, self )._on_instance_ready( first_boot )
+ if self.generation == 0 and first_boot:
+ self.__update_sudo( )
+
+ def _get_package_substitutions( self ):
+ return super( GenericUbuntuLucidBox, self )._get_package_substitutions( ) + [
+ ('git', 'git-core') ]
+
+
+ at deprecated
+class GenericUbuntuMaverickBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='maverick', version='10.10' )
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+
+ at deprecated
+class GenericUbuntuNattyBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='natty', version='11.04' )
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+
+ at deprecated
+class GenericUbuntuOneiricBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='oneiric', version='11.10' )
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+
+class GenericUbuntuPreciseBox( UpstartUbuntuBox ):
+ """
+ Generic box with Ubuntu 12.04 LTS (EOL April 2017)
+ """
+ def release( self ):
+ return self.Release( codename='precise', version='12.04' )
+
+
+ at deprecated
+class GenericUbuntuQuantalBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='quantal', version='12.10' )
+
+
+ at deprecated
+class GenericUbuntuRaringBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='raring', version='13.04' )
+
+
+ at deprecated
+class GenericUbuntuSaucyBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='saucy', version='13.10' )
+
+
+class GenericUbuntuTrustyBox( UpstartUbuntuBox ):
+ """
+ Generic box with Ubuntu 14.04 LTS (EOL April 2019)
+ """
+ def release( self ):
+ return self.Release( codename='trusty', version='14.04' )
+
+
+ at deprecated
+class GenericUbuntuUtopicBox( UpstartUbuntuBox ):
+ def release( self ):
+ return self.Release( codename='utopic', version='14.10' )
+
+
+class GenericUbuntuVividBox( SystemdUbuntuBox ):
+ """
+ Generic box with Ubuntu 15.04 (EOL February 4, 2016)
+ """
+ def release( self ):
+ return self.Release( codename='vivid', version='15.04' )
+
+
+ at deprecated
+class GenericFedora17Box( FedoraBox ):
+ """
+ This one doesn't work since the AMI was deleted by the Fedora guys
+ """
+
+ def release( self ):
+ return 17
+
+
+ at deprecated
+class GenericFedora18Box( FedoraBox ):
+ """
+ This one doesn't work since the AMI was deleted by the Fedora guys
+ """
+
+ def release( self ):
+ return 18
+
+
+ at deprecated
+class GenericFedora19Box( FedoraBox ):
+ def release( self ):
+ return 19
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ # On t1.micro, the agent installation runs out of memory
+ return "m1.small"
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+
+ at deprecated
+class GenericFedora20Box( FedoraBox ):
+ def release( self ):
+ return 20
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ # On t1.micro, the agent installation runs out of memory
+ return "m1.small"
+
+ @classmethod
+ def supported_virtualization_types( cls ):
+ return [ 'paravirtual' ]
+
+ # FIXME: Consider pulling this up
+
+ def _populate_cloud_config( self, instance_type, user_data ):
+ super( GenericFedora20Box, self )._populate_cloud_config( instance_type, user_data )
+ user_data[ 'bootcmd' ][ 0:0 ] = [
+ self._get_package_installation_command( 'yum-plugin-fastestmirror' ),
+ [ 'yum', 'clean', 'all' ] ]
+
+
+class GenericFedora21Box( FedoraBox ):
+ """
+ Generic box with Fedora 21
+ """
+ def release( self ):
+ return 21
+
+
+class GenericFedora22Box( FedoraBox ):
+ """
+ Generic box with Fedora 22
+ """
+ def release( self ):
+ return 22
+
+ def _on_instance_ready( self, first_boot ):
+ if first_boot:
+ self.__fix_stupid_locale_problem( )
+ super( GenericFedora22Box, self )._on_instance_ready( first_boot )
+
+ @fabric_task
+ def __fix_stupid_locale_problem( self ):
+ """
+ The bug:
+ https://bugzilla.redhat.com/show_bug.cgi?id=1261249
+
+ The workaround:
+ https://www.banym.de/linux/fedora/problems-with-missing-locale-files-on-fedora-20-made-libvirtd-service-not-starting
+ """
+ sudo( 'localedef -c -i en_US -f UTF-8 en_US.UTF-8' )
diff --git a/core/src/cgcloud/core/init_box.py b/core/src/cgcloud/core/init_box.py
new file mode 100644
index 0000000..278500c
--- /dev/null
+++ b/core/src/cgcloud/core/init_box.py
@@ -0,0 +1,76 @@
+from StringIO import StringIO
+from abc import abstractmethod
+
+from fabric.operations import sudo, put
+
+from cgcloud.core.box import Box, fabric_task
+
+
+class AbstractInitBox( Box ):
+ @abstractmethod
+ def _register_init_script( self, name, script ):
+ raise NotImplementedError( )
+
+ @abstractmethod
+ def _run_init_script( self, name, command='start' ):
+ raise NotImplementedError( )
+
+
+class UpstartBox( AbstractInitBox ):
+ """
+ A box that uses Ubuntu's upstart
+ """
+
+ @fabric_task
+ def _register_init_script( self, name, script ):
+ path = '/etc/init/%s.conf' % name
+ put( local_path=StringIO( script ), remote_path=path, use_sudo=True )
+ sudo( "chown root:root '%s'" % path )
+
+ @fabric_task
+ def _run_init_script( self, name, command='start' ):
+ sudo( "service %s %s" % ( name, command ) )
+
+
+class SysvInitdBox( AbstractInitBox ):
+ """
+ A box that supports SysV-style init scripts. This is more or less a kitchen sink of
+ functionality that seems to work on CentOS and Fedora.
+ """
+
+ @staticmethod
+ def _init_script_path( name ):
+ return '/etc/init.d/%s' % name
+
+ @fabric_task
+ def _register_init_script( self, name, script ):
+ script_path = self._init_script_path( name )
+ put(
+ local_path=StringIO( script ),
+ remote_path=script_path,
+ mode=0755,
+ use_sudo=True )
+ sudo( "chown root:root '%s'" % script_path )
+ sudo( 'sudo chkconfig --add %s' % name )
+
+ @fabric_task
+ def _run_init_script( self, name, command='start' ):
+ sudo( "service %s %s" % ( name, command ) )
+
+
+class SystemdBox( AbstractInitBox ):
+ """
+ A box that supports systemd which hopefully will supercede all other init systems for Linux.
+ I don't care which *expletive* init system they settle on as long as they stop reinventing
+ the wheel with a different number of corners.
+ """
+
+ @fabric_task
+ def _register_init_script( self, name, script ):
+ path = '/lib/systemd/system/%s.service' % name
+ put( local_path=StringIO( script ), remote_path=path, use_sudo=True )
+ sudo( "chown root:root '%s'" % path )
+
+ @fabric_task
+ def _run_init_script( self, name, command='start' ):
+ sudo( 'systemctl %s %s' % ( command, name ) )
diff --git a/core/src/cgcloud/core/mesos_box.py b/core/src/cgcloud/core/mesos_box.py
new file mode 100644
index 0000000..cb6782b
--- /dev/null
+++ b/core/src/cgcloud/core/mesos_box.py
@@ -0,0 +1,45 @@
+from fabric.operations import run
+from bd2k.util.strings import interpolate as fmt
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.ubuntu_box import UbuntuBox
+from cgcloud.fabric.operations import sudo, pip
+
+
+class MesosBox( UbuntuBox ):
+ """
+ A mixin for getting Mesos installed from Mesosphere's Debian repository
+ """
+
+ def _mesos_version( self ):
+ return '0.25.1'
+
+ def _mesos_egg_version( self ):
+ return '0.25.0'
+
+ @fabric_task
+ def _setup_package_repos( self ):
+ super( MesosBox, self )._setup_package_repos( )
+ sudo( 'apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF' )
+ codename = self.release( ).codename
+ sudo( fmt( 'echo "deb http://repos.mesosphere.io/ubuntu {codename} main" '
+ '> /etc/apt/sources.list.d/mesosphere.list' ) )
+
+ def _list_packages_to_install( self ):
+ return super( MesosBox, self )._list_packages_to_install( ) + [
+ 'python2.7',
+ 'mesos=' + self._mesos_version( ) + '-*' ]
+
+ def _post_install_packages( self ):
+ super( MesosBox, self )._post_install_packages( )
+ self.__install_mesos_egg( )
+
+ @fabric_task
+ def __install_mesos_egg( self ):
+ egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg'
+ version = self.release( ).version
+ run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) )
+ # We need a newer version of protobuf than what comes default on Ubuntu
+ pip( 'install --upgrade protobuf', use_sudo=True )
+ sudo( 'easy_install -a ' + egg )
+ run( 'rm ' + egg )
diff --git a/core/src/cgcloud/core/package_manager_box.py b/core/src/cgcloud/core/package_manager_box.py
new file mode 100644
index 0000000..e2347aa
--- /dev/null
+++ b/core/src/cgcloud/core/package_manager_box.py
@@ -0,0 +1,166 @@
+from abc import abstractmethod
+from itertools import chain
+
+from cgcloud.core.box import Box
+
+
+class PackageManagerBox( Box ):
+ """
+ A box that uses a package manager like apt-get or yum.
+ """
+
+ @abstractmethod
+ def _sync_package_repos( self ):
+ """
+ Update the cached package descriptions from remote package repositories,
+ e.g. apt-get update on Ubuntu
+ """
+ raise NotImplementedError( )
+
+ @abstractmethod
+ def _upgrade_installed_packages( self ):
+ """
+ Update all installed package to their lates version, e.g. apt-get update on Ubuntu.
+ """
+ raise NotImplementedError( )
+
+ @abstractmethod
+ def _install_packages( self, packages ):
+ """
+ Install the given packages
+
+ :param packages: A list of package names
+ """
+ raise NotImplementedError( )
+
+ def _setup_package_repos( self ):
+ """
+ Set up additional remote package repositories.
+ """
+ pass
+
+ def _list_packages_to_install( self ):
+ """
+ Return the list of packages to be installed.
+ """
+ return [ 'htop' ]
+
+ def _pre_install_packages( self ):
+ """
+ Invoked immediately before package installation.
+ """
+ pass
+
+ def _post_install_packages( self ):
+ """
+ Invoked immediately after package installation.
+ """
+ pass
+
+ def _get_package_substitutions( self ):
+ """
+ Return a list of package substitutions. Each substitution is a tuple of two elements. The
+ first element, aka the original, is the name of a package to be installed, the second
+ element, aka the substitutes, is an iterable of names of the packages that should be used
+ instead. An empty iterable will prevent the original from being installed. If the second
+ element is an instance of basestring, it will be treated like a singleton of that string.
+ If the second ekement is None, it will be treated like an empty iterable. Substitutes are
+ subjected to substitution, too. The dictionary may contain cycles.
+
+ The returned list will be passed to the dict() constructor. If it contains more than one
+ tuple with the same first element, only the last entry will be significant. For example,
+ [ ('a','b'), ('a','c') ] is equivalent to [ ('a','c') ].
+ """
+ return [ ]
+
+ def setup( self, upgrade_installed_packages=False ):
+ """
+ :param upgrade_installed_packages:
+ Bring the package repository as well as any installed packages up to date, i.e. do
+ what on Ubuntu is achieved by doing 'sudo apt-get update ; sudo apt-get upgrade'.
+ """
+ self._setup_package_repos( )
+ self._sync_package_repos( )
+ self._pre_install_packages( )
+ substitutions = dict( self._get_package_substitutions( ) )
+ packages = self._list_packages_to_install( )
+ packages = list( self.__substitute_packages( substitutions, packages ) )
+ self._install_packages( packages )
+ self._post_install_packages( )
+ if upgrade_installed_packages:
+ self._upgrade_installed_packages( )
+ # The upgrade might involve a kernel update, so we'll reboot to be safe
+ self.reboot( )
+
+ @abstractmethod
+ def _ssh_service_name( self ):
+ raise NotImplementedError( )
+
+ def _substitute_package( self, package ):
+ """
+ Return the set of packages that substitute the given package on this box.
+ """
+ substitutions = dict( self._get_package_substitutions( ) )
+ return self.__substitute_packages( substitutions, [ package ] )
+
+ @classmethod
+ def __substitute_package( cls, substitutions, package, history=None ):
+ """
+ Apply the given substitutions map on the package argument. Handles cycles as well as None
+ keys and values.
+
+ >>> substitute_package = PackageManagerBox._PackageManagerBox__substitute_package
+ >>> substitute_package( {}, 'a' )
+ set(['a'])
+ >>> substitute_package( { 'a': 'a' }, 'a' )
+ set(['a'])
+ >>> substitute_package( { 'a': None }, 'a' )
+ set([])
+ >>> substitute_package( { 'a': [] }, 'a' )
+ set([])
+ >>> substitute_package( { 'a': 'b' }, 'a' )
+ set(['b'])
+ >>> substitute_package( { 'a': ['b'] }, 'a' )
+ set(['b'])
+ >>> substitute_package( { 'a': 'b' }, 'b' )
+ set(['b'])
+ >>> substitute_package( { 'a': ['b'] }, 'b' )
+ set(['b'])
+ >>> substitute_package( { 'a': 'b' }, 'a' )
+ set(['b'])
+ >>> substitute_package( { 'a': 'b', 'b':'c', 'c':'a' }, 'a' )
+ set(['a'])
+ >>> substitute_package( { 'a':['a','b'], 'b':['b','c'], 'c':['c','a'] }, 'a' ) == {'a','b','c'}
+ True
+ >>> substitute_package( { 'a':['a','b'], 'b':None }, 'a' )
+ set(['a'])
+ >>> substitute_package( { 'a':['a','b'], 'b':[] }, 'a' )
+ set(['a'])
+ >>> substitute_package( { 'a':['a','b'], 'b':'c' }, 'a' ) == {'a', 'c'}
+ True
+ """
+ if not isinstance( package, basestring ):
+ raise ValueError( "Package must be a string" )
+ if history is None:
+ history = { package }
+ else:
+ if package in history: return { package }
+ history.add( package )
+ try:
+ substitutes = substitutions[ package ]
+ except KeyError:
+ return { package }
+ if substitutes is None: return set( )
+ elif isinstance( substitutes, basestring ):
+ substitute = substitutes
+ return cls.__substitute_package( substitutions, substitute, history )
+ else:
+ return cls.__substitute_packages( substitutions, substitutes, history )
+
+ @classmethod
+ def __substitute_packages( cls, substitutions, substitutes, history=None ):
+ return set( chain.from_iterable(
+ cls.__substitute_package( substitutions, substitute, history )
+ for substitute in substitutes ) )
+
+
diff --git a/core/src/cgcloud/core/project.py b/core/src/cgcloud/core/project.py
new file mode 100644
index 0000000..b0c5c96
--- /dev/null
+++ b/core/src/cgcloud/core/project.py
@@ -0,0 +1,57 @@
+import glob
+import os
+
+import pkg_resources
+from bd2k.util.collections import rindex
+
+
+def project_artifacts( project_name ):
+ """
+ Similar to project_artifact but including dependent project artifacts
+ """
+ # FIXME: This is a bit simplistic
+ if project_name == 'lib':
+ return [ project_artifact( project_name ) ]
+ else:
+ return [ project_artifact( 'lib' ), project_artifact( project_name ) ]
+
+
+def project_artifact( project_name ):
+ """
+ Resolve the name of a sibling project to something that can be passed to pip in order to get
+ that project installed. The version of the sibling project is assumed to be identical to the
+ currently installed version of this project (cgcloud-core). If the version can't be
+ determined, a source distribution is looked up in the 'dist' subdirectory of the sibling
+ project. This is likely to be the case in development mode, i.e. if this project was
+ installed via 'setup.py develop'. If neither version nor source distribution can be
+ determined, an exception will be raised.
+
+ :param project_name: the name of a sibling project such as 'agent' or 'spark-tools'
+
+ :return: Either an absolute path to a source distribution or a requirement specifier to be
+ looked up in the Python package index (PyPI).
+ """
+ dir_path = os.path.abspath( __file__ ).split( os.path.sep )
+ try:
+ # If the 'src' directory is in the module's file path, we must be in development mode.
+ i = rindex( dir_path, 'src' )
+ except ValueError:
+ # Otherwise, we must be installed and need to determine our current version.
+ version = pkg_resources.get_distribution( 'cgcloud-core' ).version
+ return 'cgcloud-%s==%s' % (project_name, version)
+ else:
+ dir_path = os.path.sep.join( dir_path[ :i ] )
+ project_path = os.path.join( os.path.dirname( dir_path ), project_name )
+ sdist_glob = os.path.join( project_path, 'dist', 'cgcloud-%s*.tar.gz' % project_name )
+ sdist = glob.glob( sdist_glob )
+ if len( sdist ) == 1:
+ sdist = sdist[ 0 ]
+ elif sdist:
+ raise RuntimeError(
+ "Can't decide which of these is the '%s' source distribution: %s" % (
+ project_name, sdist) )
+ else:
+ raise RuntimeError( "Can't find '%s' source distribution. Looking for '%s'. You may "
+ "just need to run 'make sdist' to fix this" % (
+ project_name, sdist_glob) )
+ return sdist
diff --git a/core/src/cgcloud/core/rc_local_box.py b/core/src/cgcloud/core/rc_local_box.py
new file mode 100644
index 0000000..e5c5155
--- /dev/null
+++ b/core/src/cgcloud/core/rc_local_box.py
@@ -0,0 +1,154 @@
+from collections import namedtuple
+from contextlib import closing
+from StringIO import StringIO
+
+import re
+from fabric.operations import get, put, sudo
+
+from cgcloud.lib.util import prepend_shell_script
+from cgcloud.core.box import fabric_task, Box
+
+InitCommand = namedtuple( "InitCommand", [ "command", "provides", "depends" ] )
+
+
+class RcLocalBox( Box ):
+ """
+ A mixin for implementing Box._register_init_command(), i.e. the ability to run an arbitrary
+ command everytime a box is booted, using the rc.local mechanism that most distributions
+ provide.
+ """
+
+ def __init__( self, ctx ):
+ super( RcLocalBox, self ).__init__( ctx )
+ self._init_commands = [ ]
+
+ @fabric_task
+ def _register_init_command( self, cmd ):
+ rc_local_path = self._get_rc_local_path( )
+ self._prepend_remote_shell_script( script=cmd,
+ remote_path=rc_local_path,
+ use_sudo=True,
+ mirror_local_mode=True )
+ sudo( 'chown root:root {0} && chmod +x {0}'.format( rc_local_path ) )
+
+ @fabric_task
+ def _get_rc_local_path( self ):
+ """
+ Return the canonical path to /etc/rc.local or an equivalent shell script that gets
+ executed during boot up. The last component in the path must not be be a symlink,
+ other components may be.
+ """
+ # might be a symlink but prepend_remote_shell_script doesn't work with symlinks
+ return sudo( 'readlink -f /etc/rc.local' )
+
+ @fabric_task
+ def _prepend_remote_shell_script( self, script, remote_path, **put_kwargs ):
+ """
+ Insert the given script into the remote file at the given path before the first script
+ line. See prepend_shell_script() for a definition of script line.
+
+ :param script: the script to be inserted
+ :param remote_path: the path to the file on the remote host
+ :param put_kwargs: arguments passed to Fabric's put operation
+ """
+ with closing( StringIO( ) ) as out_file:
+ with closing( StringIO( ) ) as in_file:
+ get( remote_path=remote_path, local_path=in_file )
+ in_file.seek( 0 )
+ prepend_shell_script( '\n' + script, in_file, out_file )
+ out_file.seek( 0 )
+ put( remote_path=remote_path, local_path=out_file, **put_kwargs )
+
+ env_entry_re = re.compile( r'^\s*([^=\s]+)\s*=\s*"?(.*?)"?\s*$' )
+
+ @classmethod
+ def _patch_etc_environment( cls, env_file, dirs=None, dirs_var='PATH', env_pairs=None ):
+ r"""
+ Patch /etc/environment by A) adding a list of directories to a PATH o PATH-like variable
+ and/or B) adding other environment variables to it.
+
+ :param env_file: A seekable file handle to /etc/environment or a file of that format
+
+ :param list dirs: A list of directory paths to be added to the /etc/environment entry for
+ PATH, or the entry referenced by dirs_var
+
+ :param str dirs_var: The name of the variable to append `dirs` to
+
+ :param dict env_pairs: A dictionary with other environment variable to append
+
+ >>> f=StringIO( 'FOO = " BAR " \n PATH =foo:bar\nBLA="FASEL"' )
+ >>> f.seek( 0, 2 ) # seek to end as if file was opened with mode 'a'
+ >>> RcLocalBox._patch_etc_environment( f, dirs=[ "new1" ] )
+ >>> f.getvalue()
+ 'BLA="FASEL"\nFOO=" BAR "\nPATH="foo:bar:new1"\n'
+ >>> RcLocalBox._patch_etc_environment( f, dirs=[ "new2" ], dirs_var='PATH2' )
+ >>> f.getvalue()
+ 'BLA="FASEL"\nFOO=" BAR "\nPATH="foo:bar:new1"\nPATH2="new2"\n'
+ """
+
+ def parse_entry( s ):
+ m = cls.env_entry_re.match( s )
+ return m.group( 1 ), m.group( 2 )
+
+ env_file.seek( 0 )
+ env = dict( parse_entry( _ ) for _ in env_file.read( ).splitlines( ) )
+
+ # Do we have directories to add to a path?
+ if dirs is not None:
+ path = filter( None, env.get( dirs_var, '' ).split( ':' ) )
+ path.extend( dirs )
+ env[ dirs_var ] = ':'.join( path )
+
+ # Do we have other environment variables to write?
+ if env_pairs is not None:
+ for (k, v) in env_pairs.iteritems():
+ env[k] = v
+
+ env_file.seek( 0 )
+ env_file.truncate( 0 )
+ for var in sorted( env.items( ) ):
+ env_file.write( '%s="%s"\n' % var )
+
+
+# FIXME: This is here for an experimental feature (ordering commands that depend on each other)
+
+if False:
+ def toposort2( data ):
+ """
+ Dependencies are expressed as a dictionary whose keys are items and whose values are a set
+ of dependent items. Output is a list of sets in topological order. The first set consists of
+ items with no dependences, each subsequent set consists of items that depend upon items in
+ the preceeding sets.
+
+ >>> toposort2({
+ ... 2: {11},
+ ... 9: {11, 8},
+ ... 10: {11, 3},
+ ... 11: {7, 5},
+ ... 8: {7, 3},
+ ... }) )
+ [3, 5, 7]
+ [8, 11]
+ [2, 9, 10]
+
+ """
+
+ from functools import reduce
+
+ # Ignore self dependencies.
+ for k, v in data.items( ):
+ v.discard( k )
+ # Find all items that don't depend on anything.
+ extra_items_in_deps = reduce( set.union, data.itervalues( ) ) - set( data.iterkeys( ) )
+ # Add empty dependences where needed
+ data.update( { item: set( ) for item in extra_items_in_deps } )
+ while True:
+ ordered = set( item for item, dep in data.iteritems( ) if not dep )
+ if not ordered:
+ break
+ yield ordered
+ data = { item: (dep - ordered)
+ for item, dep in data.iteritems( )
+ if item not in ordered }
+ assert not data, "Cyclic dependencies exist among these items:\n%s" % '\n'.join(
+ repr( x ) for x in data.iteritems( ) )
diff --git a/core/src/cgcloud/core/source_control_client.py b/core/src/cgcloud/core/source_control_client.py
new file mode 100644
index 0000000..e38ab6f
--- /dev/null
+++ b/core/src/cgcloud/core/source_control_client.py
@@ -0,0 +1,32 @@
+from fabric.operations import run
+
+from cgcloud.fabric.operations import sudo
+from cgcloud.core.box import fabric_task
+from cgcloud.core.package_manager_box import PackageManagerBox
+
+
+class SourceControlClient( PackageManagerBox ):
+ """
+ A box that uses source control software
+ """
+
+ @fabric_task
+ def setup_repo_host_keys(self, user=None):
+ #
+ # Pre-seed the host keys from bitbucket and github, such that ssh doesn't prompt during
+ # the initial checkouts.
+ #
+ for host in [ 'bitbucket.org', 'github.com' ]:
+ command = 'ssh-keyscan -t rsa %s >> ~/.ssh/known_hosts' % host
+ if user is None:
+ run( command )
+ elif user == 'root':
+ sudo( command )
+ else:
+ sudo( command, user=user, sudo_args='-i' )
+
+ def _list_packages_to_install(self):
+ return super( SourceControlClient, self )._list_packages_to_install( ) + [
+ 'git',
+ 'subversion',
+ 'mercurial' ]
diff --git a/core/src/cgcloud/core/task.py b/core/src/cgcloud/core/task.py
new file mode 100644
index 0000000..d7da2c1
--- /dev/null
+++ b/core/src/cgcloud/core/task.py
@@ -0,0 +1,23 @@
+import fabric.tasks
+
+
+class Task( fabric.tasks.Task ):
+ """
+ A Fabric task for EC2 boxes. Use this as the base class for custom Fabric tasks to be run on
+ an EC2 box, as represented by an instance of Ec2Box. Pass instances of this class to Ec2Box
+ .execute(). Use this only if your intend to create a hierarchy of task classes. Otherwise,
+ it is much easier to write tasks as plain methods in a concrete subclass of Ec2Box and pass
+ those method to Ec2Box.execute()
+
+ This class extends Fabric's Task by using the class name as the name of the task and
+ maintaining a link to the box instance this task is executed on.
+ """
+
+ def __init__(self, box):
+ """
+ Initializes this task for the given box.
+
+ :param box: the box
+ :type box: Box"""
+ super( Task, self ).__init__( name=self.__class__.__name__ )
+ self.box = box
diff --git a/core/src/cgcloud/core/test/__init__.py b/core/src/cgcloud/core/test/__init__.py
new file mode 100644
index 0000000..5720289
--- /dev/null
+++ b/core/src/cgcloud/core/test/__init__.py
@@ -0,0 +1,108 @@
+import os
+import sys
+from contextlib import contextmanager
+from itertools import ifilter
+from tempfile import mkstemp
+
+import subprocess32
+from bd2k.util.iterables import concat
+from boto.utils import logging
+
+from cgcloud.core.cli import main, CGCloud
+from cgcloud.lib.test import CgcloudTestCase
+
+log = logging.getLogger( __name__ )
+
+
+class CoreTestCase( CgcloudTestCase ):
+ @classmethod
+ def setUpClass( cls ):
+ CGCloud.setup_logging( )
+ CGCloud.silence_boto_and_paramiko( )
+ super( CoreTestCase, cls ).setUpClass( )
+
+ ssh_opts = ('-o', 'UserKnownHostsFile=/dev/null', '-o', 'StrictHostKeyChecking=no')
+
+ @classmethod
+ def ssh_opts_str( cls ):
+ return ' '.join( cls.ssh_opts )
+
+ def _assert_remote_failure( self, role ):
+ """
+ Proof that failed remote commands lead to test failures
+ """
+ self._ssh( role, 'true' )
+ try:
+ self._ssh( role, 'false' )
+ self.fail( )
+ except SystemExit as e:
+ self.assertEqual( e.code, 1 )
+
+ @classmethod
+ def _ssh( cls, role, *args, **kwargs ):
+ cls._cgcloud( *concat( 'ssh', dict_to_opts( kwargs ), role, cls.ssh_opts, args ) )
+
+ @classmethod
+ def _rsync( cls, role, *args, **kwargs ):
+ cls._cgcloud( *concat( 'rsync',
+ dict_to_opts( kwargs, ssh_opts=cls.ssh_opts_str( ) ),
+ role, args ) )
+
+ def _send_file( self, role, content, name ):
+ script, script_path = mkstemp( )
+ try:
+ os.write( script, content )
+ except:
+ os.close( script )
+ raise
+ else:
+ os.close( script )
+ self._rsync( role, script_path, ':' + name )
+ finally:
+ os.unlink( script_path )
+
+ @classmethod
+ def _cgcloud( cls, *args ):
+ log.info( 'Running %r', args )
+ if os.environ.get( 'CGCLOUD_TEST_EXEC', "" ):
+ subprocess32.check_call( concat( 'cgcloud', args ) )
+ else:
+ main( args )
+
+
+ at contextmanager
+def out_stderr( ):
+ with open( os.devnull, 'a' ) as f:
+ f, sys.stderr = sys.stderr, f
+ try:
+ yield
+ finally:
+ f, sys.stderr = sys.stderr, f
+
+
+def dict_to_opts( d=None, **kwargs ):
+ """
+ >>> list( dict_to_opts( dict( foo=True ) ) )
+ ['--foo']
+ >>> list( dict_to_opts( dict( foo=False) ) )
+ []
+ >>> list( dict_to_opts( foo=True ) )
+ ['--foo']
+ >>> list( dict_to_opts( dict( foo_bar=1 ), x=3 ) )
+ ['--foo-bar=1', '-x=3']
+ """
+ if d is None:
+ d = kwargs
+ elif kwargs:
+ d = dict( d, **kwargs )
+
+ def to_opt( k, v ):
+ s = '--' + k.replace( '_', '-' ) if len( k ) > 1 else '-' + k
+ if v is True:
+ return s
+ elif v is False:
+ return None
+ else:
+ return s + '=' + str( v )
+
+ return ifilter( None, (to_opt( k, v ) for k, v in d.iteritems( )) )
diff --git a/core/src/cgcloud/core/test/conftest.py b/core/src/cgcloud/core/test/conftest.py
new file mode 100644
index 0000000..7aa472a
--- /dev/null
+++ b/core/src/cgcloud/core/test/conftest.py
@@ -0,0 +1,14 @@
+def pytest_configure( config ):
+ # One of PyTest's nanny features is to redirect stdin to a thing that refuses to be read
+ # from. It is supposed to prevent tests from accidentally getting blocked waiting for user
+ # input. I have never in my life had a test that blocked on stdin without it being completely
+ # obvious, even without this nanny redirect. However, I've repeatedly run into issues where
+ # this redirection gets in the way, mainly with Fabric:
+ #
+ # http://jenkins.cgcloud.info/job/cgcloud/304/testReport/junit/src.cgcloud.core.test.test_core/CoreTests/test_generic_fedora_22_box/
+ #
+ # This workaround disables that nanny feature.
+ capman = config.pluginmanager.get_plugin( 'capturemanager' )
+ if capman._capturing.in_ is not None:
+ capman._capturing.in_.done( )
+ capman._capturing.in_ = None
diff --git a/core/src/cgcloud/core/test/test_core.py b/core/src/cgcloud/core/test/test_core.py
new file mode 100644
index 0000000..17da377
--- /dev/null
+++ b/core/src/cgcloud/core/test/test_core.py
@@ -0,0 +1,66 @@
+import logging
+import os
+
+from bd2k.util.exceptions import panic
+
+from cgcloud.core import roles
+from cgcloud.core.test import CoreTestCase, out_stderr
+
+log = logging.getLogger( __name__ )
+
+
+class CoreTests( CoreTestCase ):
+ """
+ Tests the typical life-cycle of instances and images
+ """
+ _multiprocess_shared_ = True
+
+ roles = roles( )
+
+ def _test( self, box_cls ):
+ role = box_cls.role( )
+ self._cgcloud( 'create', role )
+ try:
+ self._cgcloud( 'stop', role )
+ self._cgcloud( 'image', role )
+ try:
+ self._cgcloud( 'terminate', role )
+ self._cgcloud( 'recreate', role )
+ file_name = 'foo-' + role
+ self._ssh( role, 'touch', file_name )
+ self._rsync( role, ':' + file_name, '.' )
+ self.assertTrue( os.path.exists( file_name ) )
+ os.unlink( file_name )
+ self._cgcloud( 'terminate', role )
+ finally:
+ self._cgcloud( 'delete-image', role )
+ except:
+ with panic( log ):
+ self._cgcloud( 'terminate', '--quick', role )
+
+ @classmethod
+ def make_tests( cls ):
+ for box_cls in cls.roles:
+ test_method = (lambda _box_cls: lambda self: cls._test( self, _box_cls ))( box_cls )
+ test_method.__name__ = 'test_%s' % box_cls.role( ).replace( '-', '_' )
+ setattr( cls, test_method.__name__, test_method )
+
+ def test_illegal_argument( self ):
+ # Capture sys.stderr so we don't pollute the log of a successful run with an error message
+ with out_stderr( ):
+ self.assertRaises( SystemExit,
+ self._cgcloud, 'delete-image', self.roles[ 0 ].role( ), '-1' )
+
+ def test_pytest_capture_workaround( self ):
+ # To see this test fail, comment out the workaround in conftest.py and run this test from
+ # the command line. Note that when running the test from PyCharm you will not be able to
+ # see it fail because PyCharm's runner is hard-wired to disable PyTest's capture.
+ from fabric.operations import run
+ from fabric.context_managers import settings
+ with settings( host_string='localhost' ):
+ # We need a command that doesn't exit immediately such that the loop body in Fabric's
+ # input_loop() is actually run at least once.
+ run( 'sleep 1' )
+
+
+CoreTests.make_tests( )
diff --git a/core/src/cgcloud/core/ubuntu_box.py b/core/src/cgcloud/core/ubuntu_box.py
new file mode 100644
index 0000000..88f60d2
--- /dev/null
+++ b/core/src/cgcloud/core/ubuntu_box.py
@@ -0,0 +1,164 @@
+from abc import abstractmethod
+from ast import literal_eval
+from collections import namedtuple
+import contextlib
+import csv
+import logging
+import urllib2
+from StringIO import StringIO
+
+from fabric.operations import sudo, put, run
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.init_box import UpstartBox, SystemdBox
+from cgcloud.core.agent_box import AgentBox
+from cgcloud.core.cloud_init_box import CloudInitBox
+from cgcloud.core.package_manager_box import PackageManagerBox
+from cgcloud.core.rc_local_box import RcLocalBox
+from cgcloud.fabric.operations import remote_sudo_popen
+from cgcloud.lib.util import heredoc
+
+BASE_URL = 'http://cloud-images.ubuntu.com'
+
+log = logging.getLogger( __name__ )
+
+
+class UbuntuBox( AgentBox, CloudInitBox, RcLocalBox ):
+ """
+ A box representing EC2 instances that boot from one of Ubuntu's cloud-image AMIs
+ """
+
+ Release = namedtuple( 'Release', ('codename', 'version') )
+
+ @abstractmethod
+ def release( self ):
+ """
+ :return: the code name of the Ubuntu release
+ :rtype: UbuntuBox.Release
+ """
+ raise NotImplementedError( )
+
+ def _get_debconf_selections( self ):
+ """
+ Override in concrete a subclass to add custom debconf selections.
+
+ :return: A list of lines to be piped to debconf-set-selections (no newline at the end)
+ :rtype: list[str]
+ """
+ return [ ]
+
+ def admin_account( self ):
+ return 'ubuntu'
+
+ class TemplateDict( dict ):
+ def matches( self, other ):
+ return all( v == other.get( k ) for k, v in self.iteritems( ) )
+
+ def _base_image( self, virtualization_type ):
+ release = self.release( ).codename
+ template = self.TemplateDict( release=release, purpose='server', release_type='release',
+ storage_type='ebs', arch='amd64', region=self.ctx.region,
+ hypervisor=virtualization_type )
+ url = '%s/query/%s/server/released.current.txt' % (BASE_URL, release)
+ matches = [ ]
+ with contextlib.closing( urllib2.urlopen( url ) ) as stream:
+ images = csv.DictReader( stream,
+ fieldnames=[
+ 'release', 'purpose', 'release_type', 'release_date',
+ 'storage_type', 'arch', 'region', 'ami_id', 'aki_id',
+ 'dont_know', 'hypervisor' ],
+ delimiter='\t' )
+ for image in images:
+ if template.matches( image ):
+ matches.append( image )
+ if len( matches ) < 1:
+ raise self.NoSuchImageException(
+ "Can't find Ubuntu AMI for release %s and virtualization type %s" % (
+ release, virtualization_type) )
+ if len( matches ) > 1:
+ raise RuntimeError( 'More than one matching image: %s' % matches )
+ image_info = matches[ 0 ]
+ image_id = image_info[ 'ami_id' ]
+ return self.ctx.ec2.get_image( image_id )
+
+ apt_get = 'DEBIAN_FRONTEND=readline apt-get -q -y'
+
+ @fabric_task
+ def _sync_package_repos( self ):
+ put( remote_path='/etc/apt/apt.conf.d/99timeout',
+ use_sudo=True,
+ local_path=StringIO( heredoc( """
+ Acquire::http::Timeout "10";
+ Acquire::ftp::Timeout "10"; """ ) ), )
+ for i in range( 5 ):
+ cmd = self.apt_get + ' update'
+ result = sudo( cmd, warn_only=True )
+ if result.succeeded: return
+ # https://bugs.launchpad.net/ubuntu/+source/apt/+bug/972077
+ # https://lists.debian.org/debian-dak/2012/05/threads.html#00006
+ if 'Hash Sum mismatch' in result:
+ log.warn( "Detected race condition during in '%s'" )
+ else:
+ raise RuntimeError( "Command '%s' failed" % cmd )
+ raise RuntimeError( "Command '%s' repeatedly failed with race condition. Giving up." )
+
+ @fabric_task
+ def _upgrade_installed_packages( self ):
+ sudo( '%s upgrade' % self.apt_get )
+
+ @fabric_task
+ def _install_packages( self, packages ):
+ packages = " ".join( packages )
+ sudo( '%s --no-install-recommends install %s' % (self.apt_get, packages) )
+
+ def _get_package_installation_command( self, package ):
+ return [ 'apt-get', 'install', '-y', '--no-install-recommends', '--force-yes' ] + list(
+ self._substitute_package( package ) )
+
+ def _pre_install_packages( self ):
+ super( UbuntuBox, self )._pre_install_packages( )
+ debconf_selections = self._get_debconf_selections( )
+ if debconf_selections:
+ self.__debconf_set_selections( debconf_selections )
+
+ @fabric_task
+ def __debconf_set_selections( self, debconf_selections ):
+ with remote_sudo_popen( 'debconf-set-selections' ) as f:
+ f.write( '\n'.join( debconf_selections ) )
+
+ def _ssh_service_name( self ):
+ return 'ssh'
+
+
+class UpstartUbuntuBox( UbuntuBox, UpstartBox ):
+ pass
+
+
+class SystemdUbuntuBox( UbuntuBox, SystemdBox ):
+ pass
+
+
+class Python27UpdateUbuntuBox( UbuntuBox ):
+ """
+ A mixin for retrieving 2.7.x updates of Python from Felix Krull's deadsnakes PPA (not the one
+ with old and new Pythons, but the one dedicated to 2.7 point updates.
+
+ https://launchpad.net/~fkrull/+archive/ubuntu/deadsnakes-python2.7
+ """
+
+ @fabric_task
+ def _setup_package_repos( self ):
+ super( Python27UpdateUbuntuBox, self )._setup_package_repos( )
+ sudo( 'add-apt-repository -y ppa:fkrull/deadsnakes-python2.7' )
+
+ # FIXME: This should go some place else
+
+ @fabric_task
+ def _remote_python_version( self, python='python' ):
+ """
+ Returns a version tuple for the Python installed on the instance represented by this box
+
+ :rtype: tuple
+ """
+ return literal_eval( run( python + " -c 'import sys; print tuple(sys.version_info)'" ) )
+
diff --git a/core/src/cgcloud/core/version.py b/core/src/cgcloud/core/version.py
new file mode 120000
index 0000000..78fdfa5
--- /dev/null
+++ b/core/src/cgcloud/core/version.py
@@ -0,0 +1 @@
+../../../version.py
\ No newline at end of file
diff --git a/core/src/cgcloud/core/yum_box.py b/core/src/cgcloud/core/yum_box.py
new file mode 100644
index 0000000..2801a71
--- /dev/null
+++ b/core/src/cgcloud/core/yum_box.py
@@ -0,0 +1,78 @@
+import os.path
+from urlparse import urlparse
+
+from fabric.operations import sudo, run
+
+from cgcloud.core.init_box import SysvInitdBox
+from cgcloud.core.box import fabric_task
+from cgcloud.core.package_manager_box import PackageManagerBox
+
+
+class YumBox( PackageManagerBox, SysvInitdBox ):
+ """
+ A box that uses redhat's yum package manager
+ """
+
+ def _sync_package_repos( self ):
+ return False
+
+ @fabric_task
+ def _install_packages( self, packages ):
+ """
+ yum's error handling is a bit odd: If you pass two packages to install and one fails
+ while the other succeeds, yum exits with 0. To work around this, we need to invoke rpm to
+ check for successful installation separately of every package. Also, beware that some
+ older yums exit with 0 even if the package doesn't exist:
+
+ $ sudo yum install jasdgjhsadgajshd && echo yes
+ $ yes
+
+ :param packages: a list of package names
+ """
+ sudo( 'yum install -d 1 -y %s' % ' '.join( "'%s'" % package for package in packages ) )
+ # make sure it is really installed
+ for package in packages:
+ run( 'rpm -q %s' % package )
+
+ def _get_package_installation_command( self, package ):
+ return [ 'yum', 'install', '-d', '1', '-y' ] + list( self._substitute_package( package ) )
+
+ @fabric_task
+ def _upgrade_installed_packages( self ):
+ sudo( 'yum update -y -d 1' )
+
+ @fabric_task
+ def _yum_remove( self, package ):
+ sudo( "yum -d 1 -y remove '%s'" % package )
+
+ @fabric_task
+ def _yum_local( self, is_update, rpm_urls ):
+ """
+ Download the RPM at the given URL and run 'yum localupdate' on it.
+
+ :param rpm_urls: A list of HTTP or FTP URLs ending in a valid RPM file name.
+ """
+ rpms = [ ]
+ for rpm_url in rpm_urls:
+ run( "wget '%s'" % rpm_url )
+ rpm = os.path.basename( urlparse( rpm_url ).path )
+ rpms.append( rpm )
+
+ sudo( "yum -d 1 -y local{command} {rpms} --nogpgcheck".format(
+ command='update' if is_update else 'install',
+ rpms=' '.join( "'%s'" % rpm for rpm in rpms ) ) )
+
+ for rpm in rpms:
+ # extract package name from RPM, then check if package is actually installed
+ # since we can't rely on yum to report errors
+ run( "rpm -q $(rpm -qp --queryformat '%%{N}' '%s')" % rpm )
+ run( "rm '%s'" % rpm )
+
+ def _get_package_substitutions( self ):
+ return super( YumBox, self )._get_package_substitutions( ) + [
+ ( 'python-dev', 'python-devel' ),
+ ]
+
+ def _ssh_service_name( self ):
+ return 'sshd'
+
diff --git a/core/src/cgcloud/fabric/__init__.py b/core/src/cgcloud/fabric/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/core/src/cgcloud/fabric/operations.py b/core/src/cgcloud/fabric/operations.py
new file mode 100644
index 0000000..29696dc
--- /dev/null
+++ b/core/src/cgcloud/fabric/operations.py
@@ -0,0 +1,233 @@
+import os
+import sys
+import time
+from StringIO import StringIO
+from contextlib import contextmanager
+from fcntl import fcntl, F_GETFL, F_SETFL
+from pipes import quote
+from threading import Thread
+
+from bd2k.util.expando import Expando
+from bd2k.util.iterables import concat
+from bd2k.util.strings import interpolate as fmt
+from fabric.operations import sudo as real_sudo, get, put, run
+from fabric.state import env
+import fabric.io
+import fabric.operations
+
+
+def sudo( command, sudo_args=None, **kwargs ):
+ """
+ Work around https://github.com/fabric/fabric/issues/503
+ """
+ if sudo_args is not None:
+ old_prefix = env.sudo_prefix
+ env.sudo_prefix = '%s %s' % (old_prefix, sudo_args)
+ try:
+ return real_sudo( command, **kwargs )
+ finally:
+ if sudo_args is not None:
+ env.sudo_prefix = old_prefix
+
+
+def runv( *args, **kwargs ):
+ run( command=join_argv( args ), **kwargs )
+
+
+def sudov( *args, **kwargs ):
+ sudo( command=join_argv( args ), **kwargs )
+
+
+def pip( args, path='pip', use_sudo=False ):
+ """
+ Run pip.
+
+ :param args: a string or sequence of strings to be passed to pip as command line arguments.
+ If given a sequence of strings, its elements will be quoted if necessary and joined with a
+ single space in between.
+
+ :param path: the path to pip
+
+ :param use_sudo: whther to run pip as sudo
+ """
+ if isinstance( args, (str, unicode) ):
+ command = path + ' ' + args
+ else:
+ command = join_argv( concat( path, args ) )
+ # Disable pseudo terminal creation to prevent pip from spamming output with progress bar.
+ kwargs = Expando( pty=False )
+ if use_sudo:
+ f = sudo
+ # Set HOME so pip's cache doesn't go into real user's home, potentially creating files
+ # not owned by that user (older versions of pip) or printing a warning about caching
+ # being disabled.
+ kwargs.sudo_args = '-H'
+ else:
+ f = run
+ f( command, **kwargs )
+
+
+def join_argv( command ):
+ return ' '.join( map( quote, command ) )
+
+
+def virtualenv( name, distributions=None, pip_distribution='pip', executable=None ):
+ """
+ Installs a set of distributions (aka PyPI packages) into a virtualenv under /opt and
+ optionally links an executable from that virtualenv into /usr/loca/bin.
+
+ :param name: the name of the directory under /opt that will hold the virtualenv
+
+ :param distributions: a list of distributions to be installed into the virtualenv. Defaults
+ to [ name ]. You can also list other "pip install" options, like --pre.
+
+ :param pip_distribution: if non-empty, the distribution and optional version spec to upgrade
+ pip to. Defaults to the latest version of pip. Set to empty string to prevent pip from being
+ upgraded. Downgrades from the system-wide pip version currently don't work.
+
+ :param executable: The name of an executable in the virtualenv's bin directory that should be
+ symlinked into /usr/local/bin. The executable must be provided by the distributions that are
+ installed in the virtualenv.
+ """
+ # FIXME: consider --no-pip and easy_installing pip to support downgrades
+ if distributions is None:
+ distributions = [ name ]
+ venv = '/opt/' + name
+ admin = run( 'whoami' )
+ sudo( fmt( 'mkdir -p {venv}' ) )
+ sudo( fmt( 'chown {admin}:{admin} {venv}' ) )
+ try:
+ run( fmt( 'virtualenv {venv}' ) )
+ if pip_distribution:
+ pip( path=venv + '/bin/pip', args=[ 'install', '--upgrade', pip_distribution ] )
+ pip( path=venv + '/bin/pip', args=concat( 'install', distributions ) )
+ finally:
+ sudo( fmt( 'chown -R root:root {venv}' ) )
+ if executable:
+ sudo( fmt( 'ln -snf {venv}/bin/{executable} /usr/local/bin/' ) )
+
+
+ at contextmanager
+def remote_open( remote_path, use_sudo=False ):
+ """
+ Equivalent of open( remote_path, "a+" ) as if run on the remote system
+ """
+ buf = StringIO( )
+ get( remote_path=remote_path, local_path=buf )
+ yield buf
+ buf.seek( 0 )
+ put( local_path=buf, remote_path=remote_path, use_sudo=use_sudo )
+
+
+# noinspection PyPep8Naming
+class remote_popen( object ):
+ """
+ A context manager that yields a file handle and a
+
+ >>> from fabric.context_managers import hide, settings
+ >>> with settings(host_string='localhost'):
+ ... with hide( 'output' ):
+ ... # Disable shell since it may print additional stuff to console
+ ... with remote_popen( 'sort -n', shell=False ) as f:
+ ... f.write( '\\n'.join( map( str, [ 3, 2, 1] ) ) )
+ [localhost] run: sort -n
+ 3
+ 2
+ 1
+
+ Above is the echoed input, below the sorted output.
+
+ >>> print f.result
+ 1
+ 2
+ 3
+ """
+
+ def __init__( self, *args, **kwargs ):
+ try:
+ if kwargs[ 'pty' ]:
+ raise RuntimeError( "The 'pty' keyword argument must be omitted or set to False" )
+ except KeyError:
+ kwargs[ 'pty' ] = False
+ self.args = args
+ self.kwargs = kwargs
+ # FIXME: Eliminate this buffer and have caller write directly into the pipe
+ self.stdin = StringIO( )
+ self.stdin.result = None
+
+ def __enter__( self ):
+ return self.stdin
+
+ def __exit__( self, exc_type, exc_val, exc_tb ):
+ if exc_type is None:
+ _r, _w = os.pipe( )
+
+ def copy( ):
+ with os.fdopen( _w, 'w' ) as w:
+ w.write( self.stdin.getvalue( ) )
+
+ t = Thread( target=copy )
+ t.start( )
+ try:
+ _stdin = sys.stdin.fileno( )
+ _old_stdin = os.dup( _stdin )
+ os.close( _stdin )
+ assert _stdin == os.dup( _r )
+ # monkey-patch Fabric
+ _input_loop = fabric.operations.input_loop
+ fabric.operations.input_loop = input_loop
+ try:
+ self.stdin.result = self._run( )
+ finally:
+ fabric.operations.input_loop = _input_loop
+ os.close( _stdin )
+ os.dup( _old_stdin )
+ finally:
+ t.join( )
+ return False
+
+ def _run( self ):
+ return run( *self.args, **self.kwargs )
+
+
+# noinspection PyPep8Naming
+class remote_sudo_popen( remote_popen ):
+ def _run( self ):
+ sudo( *self.args, **self.kwargs )
+
+
+# Version of Fabric's input_loop that handles EOF on stdin and reads more greedily with
+# non-blocking mode.
+
+# TODO: We should open a ticket for this.
+
+from select import select
+from fabric.network import ssh
+
+
+def input_loop( chan, using_pty ):
+ opts = fcntl( sys.stdin.fileno( ), F_GETFL )
+ fcntl( sys.stdin.fileno( ), F_SETFL, opts | os.O_NONBLOCK )
+ try:
+ while not chan.exit_status_ready( ):
+ r, w, x = select( [ sys.stdin ], [ ], [ ], 0.0 )
+ have_char = (r and r[ 0 ] == sys.stdin)
+ if have_char and chan.input_enabled:
+ # Send all local stdin to remote end's stdin
+ bytes = sys.stdin.read( )
+ if bytes is None:
+ pass
+ elif not bytes:
+ chan.shutdown_write( )
+ break
+ else:
+ chan.sendall( bytes )
+ # Optionally echo locally, if needed.
+ if not using_pty and env.echo_stdin:
+ # Not using fastprint() here -- it prints as 'user'
+ # output level, don't want it to be accidentally hidden
+ sys.stdout.write( bytes )
+ sys.stdout.flush( )
+ time.sleep( ssh.io_sleep )
+ finally:
+ fcntl( sys.stdin.fileno( ), F_SETFL, opts )
diff --git a/core/tests.py b/core/tests.py
new file mode 100644
index 0000000..d314154
--- /dev/null
+++ b/core/tests.py
@@ -0,0 +1,7 @@
+parallelizable_keywords = [
+ 'test_generic_centos_6_box',
+ 'test_generic_fedora_21_box',
+ 'test_generic_fedora_22_box',
+ 'test_generic_ubuntu_precise_box',
+ 'test_generic_ubuntu_trusty_box',
+ 'test_generic_ubuntu_vivid_box' ]
diff --git a/jenkins.sh b/jenkins.sh
new file mode 100644
index 0000000..b5cbfe7
--- /dev/null
+++ b/jenkins.sh
@@ -0,0 +1,20 @@
+virtualenv venv
+. venv/bin/activate
+
+pip install pytest==2.9.2
+
+export CGCLOUD_ME=jenkins at jenkins-master
+
+make develop sdist
+
+# We want to use -k/--keep-going such that make doesn't fail the build on the first subproject for
+# which the tests fail and keeps testing the other projects. Unfortunately, that takes away the
+# convenience of specifiying multiple targets in one make invocation since make would not stop on a
+# failing target.
+( for target in $make_targets; do
+ ( if [ "$target" = test ]; then
+ make --jobs --keep-going $target
+ else
+ make $target
+ fi ) || exit
+done )
diff --git a/jenkins/.gitignore b/jenkins/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/jenkins/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/jenkins/README.rst b/jenkins/README.rst
new file mode 100644
index 0000000..e8d6670
--- /dev/null
+++ b/jenkins/README.rst
@@ -0,0 +1,190 @@
+The CGCloud Jenkins project contains the roles for running a distributed
+continuous integration environment in EC2 with one Jenkins master VM and
+multiple slave VMs. A Jenkins slave is a machine that the master delegates
+builds to. Slaves are launched on demand and are shutdown after a certain
+amount of idle time. The different slave roles are blueprints for setting up a
+slave VM that has the necessary prerequisites for running a particular Jenkins
+build.
+
+
+Quickstart
+==========
+
+Activate the virtualenv cgcloud was installed in and install
+``cgcloud-jenkins``::
+
+ ::
+
+ cd
+ virtualenv cgcloud
+ source cgcloud/bin/activate
+ pip install cgcloud-jenkins
+ export CGCLOUD_PLUGINS="cgcloud.jenkins:$CGCLOUD_PLUGINS"
+
+If you get ``DistributionNotFound: No distributions matching the version for
+cgcloud-jenkins``, try running ``pip install --pre cgcloud-jenkins``.
+
+Running ``cgcloud list-roles`` should now list the additional roles defined in
+the plugin::
+
+ ...
+ jenkins-master
+ ubuntu-lucid-genetorrent-jenkins-slave
+ ubuntu-precise-genetorrent-jenkins-slave
+ ubuntu-saucy-genetorrent-jenkins-slave
+ ubuntu-trusty-genetorrent-jenkins-slave
+ centos5-genetorrent-jenkins-slave
+ centos6-genetorrent-jenkins-slave
+ fedora19-genetorrent-jenkins-slave
+ fedora20-genetorrent-jenkins-slave
+ ubuntu-lucid-generic-jenkins-slave
+ ubuntu-precise-generic-jenkins-slave
+ ubuntu-saucy-generic-jenkins-slave
+ ubuntu-trusty-generic-jenkins-slave
+ centos5-generic-jenkins-slave
+ centos6-generic-jenkins-slave
+ fedora19-generic-jenkins-slave
+ fedora20-generic-jenkins-slave
+ centos5-rpmbuild-jenkins-slave
+ centos6-rpmbuild-jenkins-slave
+ load-test-box
+ data-browser-jenkins-slave
+
+Master And Slave Roles
+======================
+
+The plugin defines a role for the master (``jenkins-master``) and various slave
+roles for running builds for certain building CGL projects. There are also a
+bunch of generic slaves that are not customized for a particular project.
+
+The master (``jenkins-master``) is a long-running box that hosts the Jenkins
+web application. The Jenkins installation (code and data) is cordoned off in
+the home directory of a separate ``jenkins`` user. That home directory actually
+resides on a secondary EBS volume whose life cycle is independent from that of
+the master box, i.e. VM instance. This allows us to update the OS of the master
+without having to setup Jenkins from scratch every time we do so.
+
+The remaining roles define the Jenkins slaves. A Jenkins slave is a
+short-running box with which the master establishes an SSH connection for the
+purpose of triggering a *remote build*. The CGCLoud Jenkins plugin (this
+project) is used to create the VM images and register them with the master such
+that the master can launch a slave instance when needed to run a remote build
+on the platform provided by the slave.
+
+Jenkins
+=======
+
+Jenkins is a continuous integration server/web applicaton running on the
+``jenkins-master``. Jenkins uses so called *projects* that define where to get
+the source, how to build and test the source and which build artifacts to
+archive. Builds can be run automatically whenever a push is made, on a fixed
+schedule or manually. Builds are executed by an agent. Agents can run locally
+on the Jenkins master or remotely on one or more slaves. Jenkins uses its own
+plugin system to extend and modify the default behavior. We use the EC2 plugin
+which allows us to create slaves on demand in EC2 from images created by
+cgcloud in conjunction with this project. Mind the distinction between CGCloud
+Jenkins which is plugs into CGCLoud and the hundreds of plugins that extend
+Jenkins.
+
+The Jenkins web UI can always be accessed by tunneling port 8080 through SSH.
+Running `cgcloud ssh jenkins-master` sets up the necessary port forwarding.
+Authorization and authentication in Jenkins itself is disabled on a fresh
+instance but can be enabled and further customized using Jenkins plugins. Note:
+Anyone with SSH access to the master can access Jenkins and do anything with it.
+
+Tutorial: Creating a Continuous Integration Environment
+=======================================================
+
+In this tutorial we'll create a continuous integration environment consisting
+of a Jenkins master and several slaves. The tutorial assumes that you completed
+the Quickstart section of the CGCloud README.
+
+Creating The Master
+-------------------
+
+Create the Jenkins master instance::
+
+ cgcloud create jenkins-master
+
+As a test, SSH into the master as the administrative user::
+
+ cgcloud ssh -a jenkins-master
+ exit
+
+The administrative user has ``sudo`` privileges. Its name varies from platform
+to platform but ``cgcloud`` keeps track of that for you. For yet another test,
+SSH into the master as the *jenkins* user::
+
+ cgcloud ssh jenkins-master
+
+This is the user that the Jenkins server runs as.
+
+Next, create an image of the master such that you can always recreate a 100%
+identical clone::
+
+ cgcloud stop jenkins-master
+ cgcloud image jenkins-master
+ cgcloud terminate jenkins-master
+ cgcloud recreate jenkins-master
+
+The first command is necessary to stop the master because only a stopped
+instance can be imaged. The ``image`` command creates the actual AMI image. The
+``terminate`` command disposes of the instance. This will delete the ``/``
+partition while leaving the ``/var/lib/jenkins`` partition around. The latter
+is stored on a separate EBS volume called ``jenkins-data``. In other words, the
+``terminate`` command leaves us with two things: 1) the AMI for a master box
+and 2) the Jenkins data volume. The ``recreate`` command then creates a new
+instance from the most recently created image *and* attaches the
+``jenkins-data`` volume that instance.
+
+Creating The Slaves
+-------------------
+
+Open a new shell window and create the first slave::
+
+ cgcloud list-roles
+ cgcloud create docker-jenkins-slave
+
+SSH into it::
+
+ cgcloud ssh -a docker-jenkins-slave
+
+Notice that
+
+ * The admin user has sudo rights::
+
+ sudo whoami
+
+ * The builds directory in the Jenkins user's home is symbolically linked to
+ ephemeral storage::
+
+ sudo ls -l ~jenkins
+
+ * git and docker are installed::
+
+ git --version
+ docker --version
+ exit
+
+Now stop, image and terminate the box::
+
+ cgcloud stop docker-jenkins-slave
+ cgcloud image docker-jenkins-slave
+ cgcloud terminate docker-jenkins-slave
+
+Finally, register all slaves with the master::
+
+ cgcloud register-slaves jenkins-master docker-jenkins-slave
+
+The ``register-slaves`` command adds a section to Jenkins' config.xml defines
+how to spawn an EC2 instance of ``docker-jenkins-slave`` from the AMI we just
+created. The slave description also associates the slave with the label
+``docker``. If a project definition requests to be run on slaves labelled
+``docker``, an instance will be created from the AMI. Once the instance is up,
+the Jenkins master will launch the agent on via SSH. Finally, the master will
+ask the agent to run a build for that project. If a slave labelled ``docker``
+already exists, it will be used instead of creating a new one. You can
+customize how may concurrent builds run on each slave by increasing the number
+of agents running on a slave. By default only one slave per role will be
+launched but you can configure Jenkins to launch more than one if the queue
+contains multiple builds for a given label.
diff --git a/jenkins/setup.cfg b/jenkins/setup.cfg
new file mode 100644
index 0000000..a7b1d9d
--- /dev/null
+++ b/jenkins/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules -k 'not create_all_slaves'
diff --git a/jenkins/setup.py b/jenkins/setup.py
new file mode 100644
index 0000000..0d3071e
--- /dev/null
+++ b/jenkins/setup.py
@@ -0,0 +1,20 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, fabric_dep
+
+setup( name='cgcloud-jenkins',
+ version=cgcloud_version,
+
+ author="Hannes Schmidt",
+ author_email="hannes at ucsc.edu",
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage a Jenkins continuous integration cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[ 'cgcloud-lib==' + cgcloud_version,
+ 'cgcloud-core==' + cgcloud_version,
+ fabric_dep ] )
diff --git a/jenkins/src/cgcloud/__init__.py b/jenkins/src/cgcloud/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/jenkins/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/jenkins/src/cgcloud/jenkins/__init__.py b/jenkins/src/cgcloud/jenkins/__init__.py
new file mode 100644
index 0000000..2473b3a
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/__init__.py
@@ -0,0 +1,21 @@
+def roles( ):
+ from cgcloud.jenkins.jenkins_master import JenkinsMaster
+ from cgcloud.jenkins.generic_jenkins_slaves import (UbuntuLucidGenericJenkinsSlave,
+ Centos5GenericJenkinsSlave,
+ Centos6GenericJenkinsSlave,
+ Fedora19GenericJenkinsSlave,
+ Fedora20GenericJenkinsSlave,
+ UbuntuPreciseGenericJenkinsSlave,
+ UbuntuTrustyGenericJenkinsSlave)
+ from cgcloud.jenkins.cgcloud_jenkins_slave import CgcloudJenkinsSlave
+ from cgcloud.jenkins.rpmbuild_jenkins_slaves import (Centos5RpmbuildJenkinsSlave,
+ Centos6RpmbuildJenkinsSlave)
+ from cgcloud.jenkins.s3am_jenkins_slave import S3amJenkinsSlave
+ from cgcloud.jenkins.toil_jenkins_slave import ToilJenkinsSlave
+ from cgcloud.jenkins.docker_jenkins_slave import DockerJenkinsSlave
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
+
+
+def command_classes( ):
+ from cgcloud.jenkins.commands import RegisterSlaves
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
diff --git a/jenkins/src/cgcloud/jenkins/cgcloud_jenkins_slave.py b/jenkins/src/cgcloud/jenkins/cgcloud_jenkins_slave.py
new file mode 100644
index 0000000..03daf60
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/cgcloud_jenkins_slave.py
@@ -0,0 +1,81 @@
+from cgcloud.core.common_iam_policies import ec2_full_policy
+from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox
+from cgcloud.lib import test_namespace_suffix_length
+from cgcloud.lib.util import abreviated_snake_case_class_name
+
+from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave
+
+
+class CgcloudJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, Python27UpdateUbuntuBox ):
+ """
+ Jenkins slave for runing CGCloud's unit tests
+ """
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return "m3.xlarge"
+
+ def _list_packages_to_install( self ):
+ return super( CgcloudJenkinsSlave, self )._list_packages_to_install( ) + [
+ # for PyCrypto:
+ 'python-dev',
+ 'autoconf',
+ 'automake',
+ 'binutils',
+ 'gcc',
+ 'make',
+ 'libyaml-dev'
+ ]
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( CgcloudJenkinsSlave, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( CgcloudJenkinsSlave )
+ cgcloud_bucket_arn = "arn:aws:s3:::%s" % self.ctx.s3_bucket_name
+ policies.update( dict(
+ ec2_full=ec2_full_policy, # FIXME: Be more specific
+ iam_cgcloud_jenkins_slave_pass_role=dict( Version="2012-10-17", Statement=[
+ # This assumes that if instance lives in /, then tests running on the instance
+ # will run in /test-5571439d. If the instance lives in /foo, then tests running
+ # on the instance will run in /foo/test-5571439d.
+ dict( Effect="Allow", Resource=self._pass_role_arn(),
+ Action="iam:PassRole" ) ] ),
+ register_keypair=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="arn:aws:s3:::*", Action="s3:ListAllMyBuckets" ),
+ dict( Effect="Allow",
+ Action="s3:*",
+ Resource=[ cgcloud_bucket_arn, cgcloud_bucket_arn + "/*" ] ),
+ dict( Effect="Allow",
+ Action=[ "sns:Publish", "sns:CreateTopic" ],
+ Resource='arn:aws:sns:*:%s:cgcloud-agent-notifications' % self.ctx.account ) ] ),
+ iam_cgcloud_jenkins_slave=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow",
+ Resource="*",
+ Action=[ "iam:ListRoles",
+ "iam:CreateRole",
+ "iam:DeleteRole",
+ "iam:ListRolePolicies",
+ "iam:DeleteRolePolicy",
+ "iam:GetRolePolicy",
+ "iam:PutRolePolicy",
+ "iam:ListInstanceProfiles",
+ "iam:GetInstanceProfile",
+ "iam:CreateInstanceProfile",
+ "iam:DeleteInstanceProfile",
+ "iam:RemoveRoleFromInstanceProfile",
+ "iam:AddRoleToInstanceProfile",
+ "iam:DeleteInstanceProfile" ] ) ] ) ) )
+ return iam_role_name, policies
+
+ def _pass_role_arn( self ):
+ """
+ Return a pattern that a role name must match if it is to be passed to an instance created
+ by code running on this Jenkins slave.
+ """
+ # This is a bit convoluted, but it is still better than optionally allowing wildcards in
+ # the name validation in Context.absolute_name(). The ? wildcard is not very well
+ # documented but I found evidence for it here:
+ # http://docs.aws.amazon.com/IAM/latest/UserGuide/PolicyVariables.html#policy-vars-specialchars
+ pass_role_arn = self._role_arn( iam_role_name_prefix='test/testnamespacesuffixpattern/' )
+ pass_role_arn = pass_role_arn.replace( 'testnamespacesuffixpattern',
+ "?" * test_namespace_suffix_length )
+ return pass_role_arn
diff --git a/jenkins/src/cgcloud/jenkins/commands.py b/jenkins/src/cgcloud/jenkins/commands.py
new file mode 100644
index 0000000..71597de
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/commands.py
@@ -0,0 +1,40 @@
+from fnmatch import fnmatch
+import os
+
+from cgcloud.core.commands import InstanceCommand
+
+
+class RegisterSlaves( InstanceCommand ):
+ """
+ Adds the specified slave images to Jenkins' EC2 configuration on the given master to the
+ extend that the specified master can spawn later these slaves to run builds as needed.
+ """
+
+ def __init__( self, application, **kwargs ):
+ super( RegisterSlaves, self ).__init__( application, **kwargs )
+ self.option( '--slaves', '-s', metavar='ROLE_GLOB',
+ nargs='*', default=[ '*-jenkins-slave' ],
+ help='A list of roles names or role name patterns (shell globs) of the '
+ 'slaves that should be added to the Jenkins config. For each matching '
+ 'slave, the most recently created image will be registered using the '
+ 'recommended instance type for that slave.' )
+ self.option( '--clean', '-C', default=False, action='store_true',
+ help='Clear the list of slaves in the master before registering new slaves. '
+ 'Beware that this option removes slaves that were registered through '
+ 'other means, e.g. via the web UI.' )
+ self.option( '--instance-type', '-t', metavar='TYPE',
+ default=os.environ.get( 'CGCLOUD_INSTANCE_TYPE', None ),
+ help='The type of EC2 instance to register the slave with, e.g. t1.micro, '
+ 'm1.small, m1.medium, or m1.large etc. The value of the environment '
+ 'variable CGCLOUD_INSTANCE_TYPE, if that variable is present, overrides '
+ 'the default, an instance type appropriate for the role.' )
+
+ def run_on_instance( self, options, master ):
+ master.register_slaves( [ slave_cls
+ for role, slave_cls in self.application.roles.iteritems( )
+ for role_glob in options.slaves
+ if fnmatch( role, role_glob ) ],
+ clean=options.clean,
+ instance_type=options.instance_type )
+
+
diff --git a/jenkins/src/cgcloud/jenkins/docker_jenkins_slave.py b/jenkins/src/cgcloud/jenkins/docker_jenkins_slave.py
new file mode 100644
index 0000000..bd59bee
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/docker_jenkins_slave.py
@@ -0,0 +1,20 @@
+from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox
+from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave
+from cgcloud.core.docker_box import DockerBox
+
+
+class DockerJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, DockerBox, Python27UpdateUbuntuBox ):
+ """
+ A box for running the cgl-docker-lib builds on. Probably a bit of a misnomer but so far the
+ only cgl-docker-lib particular is the dependency on make.
+ """
+
+ def _list_packages_to_install( self ):
+ return super( DockerJenkinsSlave, self )._list_packages_to_install( ) + [ 'make' ]
+
+ def _docker_users( self ):
+ return super( DockerJenkinsSlave, self )._docker_users( ) + [ 'jenkins' ]
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return 'm3.large'
diff --git a/jenkins/src/cgcloud/jenkins/generic_jenkins_slaves.py b/jenkins/src/cgcloud/jenkins/generic_jenkins_slaves.py
new file mode 100644
index 0000000..0cd2b1b
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/generic_jenkins_slaves.py
@@ -0,0 +1,153 @@
+from cgcloud.core.generic_boxes import *
+from cgcloud.fabric.operations import sudo
+
+from cgcloud.jenkins.jenkins_slave import JenkinsSlave
+from cgcloud.core.ubuntu_box import UbuntuBox
+
+
+class GenericJenkinsSlave( JenkinsSlave ):
+ """
+ Generic Jenkins slave
+ """
+ pass
+
+
+class CentosGenericJenkinsSlave( CentosBox, GenericJenkinsSlave ):
+ """
+ Generic Jenkins slave for CentOS
+ """
+
+ def _list_packages_to_install( self ):
+ # TODO: List JRE explicitly (it is already installed on RightScale CentOS images)
+ return super( CentosGenericJenkinsSlave, self )._list_packages_to_install( ) + [ ]
+
+ @fabric_task
+ def _setup_build_user( self ):
+ super( CentosGenericJenkinsSlave, self )._setup_build_user( )
+ sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" )
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/yum' >> /etc/sudoers" )
+
+ @fabric_task
+ def _post_install_packages( self ):
+ super( CentosGenericJenkinsSlave, self )._post_install_packages( )
+ # FIXME: These are public but we should rebuild them and host them within our control
+ self._yum_local( is_update=False, rpm_urls=[
+ 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-2.7.2-cghub.x86_64.rpm',
+ 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-devel-2.7.2-cghub.x86_64.rpm',
+ 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-setuptools-0.6c11-cghub.noarch.rpm'
+ ] )
+
+
+class Centos5GenericJenkinsSlave( CentosGenericJenkinsSlave, GenericCentos5Box ):
+ """
+ Generic Jenkins slave for CentOS 5
+ """
+ pass
+
+
+class Centos6GenericJenkinsSlave( CentosGenericJenkinsSlave, GenericCentos6Box ):
+ """
+ Generic Jenkins slave for CentOS 6
+ """
+ pass
+
+
+class UbuntuGenericJenkinsSlave( UbuntuBox, GenericJenkinsSlave ):
+ """
+ Generic Jenkins slave for Ubuntu
+ """
+
+ def _list_packages_to_install( self ):
+ return super( UbuntuGenericJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'openjdk-7-jre-headless',
+ 'gdebi-core' ] # comes in handy when installing .deb's with dependencies
+
+ @fabric_task
+ def _setup_build_user( self ):
+ super( UbuntuGenericJenkinsSlave, self )._setup_build_user( )
+ sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
+ for prog in ('apt-get', 'dpkg', 'gdebi'):
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/%s' >> /etc/sudoers" % prog )
+
+ def _get_debconf_selections( self ):
+ # On Lucid, somehow postfix gets pulled in as a dependency kicking the frontend into
+ # interactive mode. The same happens when installing GridEngine.
+ return super( UbuntuGenericJenkinsSlave, self )._get_debconf_selections( ) + [
+ "postfix postfix/main_mailer_type string 'No configuration'",
+ "postfix postfix/mailname string %s" % self.host_name
+ ]
+
+
+class UbuntuLucidGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuLucidBox ):
+ """
+ Generic Jenkins slave for Ubuntu 10.04 LTS (EOL April 2015)
+ """
+
+ def _setup_package_repos( self ):
+ super( UbuntuLucidGenericJenkinsSlave, self )._setup_package_repos( )
+ self.__add_git_ppa( )
+ self.__add_python_ppa( )
+
+ @fabric_task
+ def __add_git_ppa( self ):
+ sudo( 'add-apt-repository -y ppa:git-core/ppa' )
+
+ @fabric_task
+ def __add_python_ppa( self ):
+ sudo( 'apt-add-repository -y ppa:fkrull/deadsnakes/ubuntu' )
+
+ def _list_packages_to_install( self ):
+ return super( UbuntuLucidGenericJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'python2.7',
+ 'python2.7-dev'
+ ]
+
+ def _get_package_substitutions( self ):
+ return super( UbuntuLucidGenericJenkinsSlave, self )._get_package_substitutions( ) + [
+ ('openjdk-7-jre-headless', 'openjdk-6-jre') ]
+
+
+class UbuntuPreciseGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuPreciseBox ):
+ """
+ Generic Jenkins slave for Ubuntu 12.04 LTS (EOL April 2017)
+ """
+ pass
+
+
+class UbuntuTrustyGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuTrustyBox ):
+ """
+ Generic Jenkins slave for Ubuntu 14.04 LTS (EOL April 2019)
+ """
+ pass
+
+
+class FedoraGenericJenkinsSlave( FedoraBox, GenericJenkinsSlave ):
+ """
+ Generic Jenkins slave for Fedora
+ """
+
+ def _list_packages_to_install( self ):
+ return super( FedoraGenericJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'java-1.7.0-openjdk' ]
+
+ @fabric_task
+ def _setup_build_user( self ):
+ super( FedoraGenericJenkinsSlave, self )._setup_build_user( )
+ sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" )
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/yum' >> /etc/sudoers" )
+
+
+class Fedora19GenericJenkinsSlave( FedoraGenericJenkinsSlave, GenericFedora19Box ):
+ """
+ Generic Jenkins slave for Fedora 19
+ """
+ pass
+
+
+class Fedora20GenericJenkinsSlave( FedoraGenericJenkinsSlave, GenericFedora20Box ):
+ """
+ Generic Jenkins slave for Fedora 20
+ """
+ pass
diff --git a/jenkins/src/cgcloud/jenkins/jenkins_master.py b/jenkins/src/cgcloud/jenkins/jenkins_master.py
new file mode 100644
index 0000000..27cc375
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/jenkins_master.py
@@ -0,0 +1,330 @@
+from StringIO import StringIO
+from contextlib import contextmanager
+import logging
+from textwrap import dedent
+from xml.etree import ElementTree
+
+from fabric.context_managers import hide
+
+from fabric.operations import run, sudo, put, get
+
+from cgcloud.lib.ec2 import EC2VolumeHelper
+from cgcloud.lib.util import UserError, abreviated_snake_case_class_name
+from cgcloud.core.box import fabric_task
+from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox
+from cgcloud.core.source_control_client import SourceControlClient
+
+log = logging.getLogger( __name__ )
+
+
+# FIXME: __create_jenkins_keypair and __inject_aws_credentials fail when the Jenkins volume is fresh
+# since certain files like config.xml don't exist (because Jenkins hasn't written them out yet or
+# because the plugin isn't installed yet. The workaround is to install all stop the instance (
+
+# FIXME: __create_jenkins_keypair still uses the old configuration section to inject the private
+# key into Jenkins. Since then Jenkins switched to a new credentials system rendering the old
+# method ineffective. We should switch to the new system or remove the code. After all it is easy
+# enought to configure the credentials by hand.
+
+class Jenkins:
+ user = 'jenkins'
+ """
+ The name of the user account that Jenkins runs as. Note that we are not free to chose this as
+ it is determined by the jenkins package for Ubuntu
+ """
+
+ group = 'nogroup'
+ """
+ The name of the group that Jenkins runs as.
+ """
+
+ data_device_ext = '/dev/sdf'
+ """
+ EC2's name of the block device to which to attach the Jenkins data volume
+ """
+
+ data_device_int = '/dev/xvdf'
+ """
+ The kernel's name of the block device to which to attach the Jenkins data volume
+ """
+
+ data_volume_name = 'jenkins-data'
+ """
+ The value of the Name tag of the Jenkins data volume
+ """
+
+ data_volume_fs_label = data_volume_name
+ """
+ The label of the file system on the Jenkins data volume
+ """
+
+ data_volume_size_gb = 100
+ """
+ The size of the Jenkins data volume
+ """
+
+ home = '/var/lib/jenkins'
+ """
+ The jenkins user's home directory on the build master
+ """
+
+
+jenkins = vars( Jenkins )
+
+
+class JenkinsMaster( GenericUbuntuTrustyBox, SourceControlClient ):
+ """
+ An instance of this class represents the build master in EC2
+ """
+
+ def __init__( self, ctx ):
+ super( JenkinsMaster, self ).__init__( ctx )
+ self.volume = None
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return "m3.large"
+
+ def other_accounts( self ):
+ return super( JenkinsMaster, self ).other_accounts( ) + [ Jenkins.user ]
+
+ def default_account( self ):
+ return Jenkins.user
+
+ def prepare( self, *args, **kwargs ):
+ self.volume = EC2VolumeHelper( ec2=self.ctx.ec2,
+ name=self.ctx.to_aws_name( Jenkins.data_volume_name ),
+ size=Jenkins.data_volume_size_gb,
+ availability_zone=self.ctx.availability_zone )
+ return super( JenkinsMaster, self ).prepare( *args, **kwargs )
+
+ def _on_instance_running( self, first_boot ):
+ if first_boot:
+ self.volume.attach( self.instance_id, device=Jenkins.data_device_ext )
+ super( JenkinsMaster, self )._on_instance_running( first_boot )
+
+ @fabric_task
+ def _setup_package_repos( self ):
+ #
+ # Jenkins
+ #
+ super( JenkinsMaster, self )._setup_package_repos( )
+ run( "wget -q -O - 'http://pkg.jenkins-ci.org/debian/jenkins-ci.org.key' "
+ "| sudo apt-key add -" )
+ sudo( "echo deb http://pkg.jenkins-ci.org/debian binary/ "
+ "> /etc/apt/sources.list.d/jenkins.list" )
+ #
+ # Enable multiverse sources
+ #
+ sudo( 'apt-add-repository multiverse' )
+
+ def _list_packages_to_install( self ):
+ packages = super( JenkinsMaster, self )._list_packages_to_install( )
+ return packages + [
+ 'ec2-api-tools' ]
+
+ @fabric_task
+ def _install_packages( self, packages ):
+ super( JenkinsMaster, self )._install_packages( packages )
+ # work around https://issues.jenkins-ci.org/browse/JENKINS-20407
+ sudo( 'mkdir /var/run/jenkins' )
+ # Use confold so it doesn't get hung up on our pre-staged /etc/default/jenkins
+ sudo( 'apt-get -q -y -o Dpkg::Options::=--force-confold install jenkins' )
+
+ @fabric_task
+ def _pre_install_packages( self ):
+ #
+ # Pre-stage the defaults file for Jenkins. It differs from the maintainer's version in the
+ # following ways: (please document all changes in this comment)
+ #
+ # 1) cruft was removed
+ # 2) --httpListenAddress=127.0.0.1 was added to make Jenkins listen locally only
+ #
+ instance_type = self.instance.instance_type
+ etc_default_jenkins = StringIO( dedent( '''\
+ NAME=jenkins
+ JAVA=/usr/bin/java
+ JAVA_ARGS="-Xmx{jvm_heap_size}"
+ #JAVA_ARGS="-Djava.net.preferIPv4Stack=true" # make jenkins listen on IPv4 address
+ PIDFILE=/var/run/jenkins/jenkins.pid
+ JENKINS_USER={user}
+ JENKINS_WAR=/usr/share/jenkins/jenkins.war
+ JENKINS_HOME="{home}"
+ RUN_STANDALONE=true
+
+ # log location. this may be a syslog facility.priority
+ JENKINS_LOG=/var/log/jenkins/$NAME.log
+ #JENKINS_LOG=daemon.info
+
+ # See http://github.com/jenkinsci/jenkins/commit/2fb288474e980d0e7ff9c4a3b768874835a3e92e
+ MAXOPENFILES=8192
+
+ HTTP_PORT=8080
+ AJP_PORT=-1
+ JENKINS_ARGS="\\
+ --webroot=/var/cache/jenkins/war \\
+ --httpPort=$HTTP_PORT \\
+ --ajp13Port=$AJP_PORT \\
+ --httpListenAddress=127.0.0.1 \\
+ "
+ '''.format( jvm_heap_size='256m' if instance_type == 't1.micro' else '1G',
+ **jenkins ) ) )
+ put( etc_default_jenkins, '/etc/default/jenkins', use_sudo=True, mode=0644 )
+ sudo( 'chown root:root /etc/default/jenkins' )
+ #
+ # Prepare data volume if necessary
+ #
+ sudo( 'mkdir -p %s' % Jenkins.home )
+ # Only format empty volumes
+ if sudo( 'file -sL %s' % Jenkins.data_device_int ) == '%s: data' % Jenkins.data_device_int:
+ sudo( 'mkfs -t ext4 %s' % Jenkins.data_device_int )
+ sudo( 'e2label {data_device_int} {data_volume_fs_label}'.format( **jenkins ) )
+ else:
+ # if the volume is not empty, verify the file system label
+ label = sudo( 'e2label %s' % Jenkins.data_device_int )
+ if label != Jenkins.data_volume_fs_label:
+ raise AssertionError( "Unexpected volume label: '%s'" % label )
+
+ #
+ # Mount data volume permanently
+ #
+ sudo( "echo 'LABEL={data_volume_fs_label} {home} ext4 defaults 0 2' "
+ ">> /etc/fstab".format( **jenkins ) )
+ sudo( 'mount -a' )
+ # in case the UID is different on the volume
+ sudo( 'useradd -d {home} -g {group} -s /bin/bash {user}'.format( **jenkins ) )
+ sudo( 'chown -R {user} {home}'.format( **jenkins ) )
+
+ @classmethod
+ def ec2_keypair_name( cls, ctx ):
+ return Jenkins.user + '@' + ctx.to_aws_name( cls.role( ) )
+
+ @fabric_task( user=Jenkins.user )
+ def __create_jenkins_keypair( self ):
+ key_path = '%s/.ssh/id_rsa' % Jenkins.home
+ ec2_keypair_name = self.ec2_keypair_name( self.ctx )
+ ssh_privkey, ssh_pubkey = self._provide_generated_keypair( ec2_keypair_name, key_path )
+ with self.__patch_jenkins_config( ) as config:
+ text_by_xpath = { './/hudson.plugins.ec2.EC2Cloud/privateKey/privateKey': ssh_privkey }
+ for xpath, text in text_by_xpath.iteritems( ):
+ for element in config.iterfind( xpath ):
+ if element.text != text:
+ element.text = text
+
+ @fabric_task
+ def _post_install_packages( self ):
+ super( JenkinsMaster, self )._post_install_packages( )
+ self._propagate_authorized_keys( Jenkins.user, Jenkins.group )
+ self.setup_repo_host_keys( user=Jenkins.user )
+ self.__create_jenkins_keypair( )
+
+ def _ssh_args( self, user, command ):
+ # Add port forwarding to Jenkins' web UI
+ command = [ '-L', 'localhost:8080:localhost:8080' ] + command
+ return super( JenkinsMaster, self )._ssh_args( user, command )
+
+ @fabric_task( user=Jenkins.user )
+ def register_slaves( self, slave_clss, clean=False, instance_type=None ):
+ with self.__patch_jenkins_config( ) as config:
+ templates = config.find( './/hudson.plugins.ec2.EC2Cloud/templates' )
+ if templates is None:
+ raise UserError(
+ "Can't find any configuration for the Jenkins Amazon EC2 plugin. Make sure it is "
+ "installed and configured on the %s in %s." % (
+ self.role( ), self.ctx.namespace) )
+ template_element_name = 'hudson.plugins.ec2.SlaveTemplate'
+ if clean:
+ for old_template in templates.findall( template_element_name ):
+ templates.getchildren( ).remove( old_template )
+ for slave_cls in slave_clss:
+ slave = slave_cls( self.ctx )
+ images = slave.list_images( )
+ try:
+ image = images[ -1 ]
+ except IndexError:
+ raise UserError( "No images for '%s'" % slave_cls.role( ) )
+ new_template = slave.slave_config_template( image, instance_type )
+ description = new_template.find( 'description' ).text
+ found = False
+ for old_template in templates.findall( template_element_name ):
+ if old_template.find( 'description' ).text == description:
+ if found:
+ raise RuntimeError( 'More than one existing slave definition for %s. '
+ 'Fix and try again' % description )
+ i = templates.getchildren( ).index( old_template )
+ templates[ i ] = new_template
+ found = True
+ if not found:
+ templates.append( new_template )
+ # newer versions of Jenkins add class="empty-list" attribute if there are no templates
+ if templates.attrib.get( 'class' ) == 'empty-list':
+ templates.attrib.pop( 'class' )
+
+ def _image_block_device_mapping( self ):
+ # Do not include the data volume in the snapshot
+ bdm = self.instance.block_device_mapping
+ bdm[ Jenkins.data_device_ext ].no_device = True
+ return bdm
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( JenkinsMaster, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( JenkinsMaster )
+ policies.update( dict(
+ ec2_full=dict(
+ Version="2012-10-17",
+ Statement=[
+ # FIXME: Be more specific
+ dict( Effect="Allow", Resource="*", Action="ec2:*" ) ] ),
+ jenkins_master_iam_pass_role=dict(
+ Version="2012-10-17",
+ Statement=[
+ dict( Effect="Allow", Resource=self._role_arn( ), Action="iam:PassRole" ) ] ),
+ jenkins_master_s3=dict(
+ Version="2012-10-17",
+ Statement=[
+ dict( Effect="Allow", Resource="arn:aws:s3:::*", Action="s3:ListAllMyBuckets" ),
+ dict( Effect="Allow", Action="s3:*", Resource=[
+ "arn:aws:s3:::public-artifacts.cghub.ucsc.edu",
+ "arn:aws:s3:::public-artifacts.cghub.ucsc.edu/*" ] ) ] ) ) )
+ return iam_role_name, policies
+
+ @contextmanager
+ def __patch_jenkins_config( self ):
+ """
+ A context manager that retrieves the Jenkins configuration XML, deserializes it into an
+ XML ElementTree, yields the XML tree, then serializes the tree and saves it back to
+ Jenkins.
+ """
+ config_file = StringIO( )
+ if run( 'test -f ~/config.xml', quiet=True ).succeeded:
+ fresh_instance = False
+ get( remote_path='~/config.xml', local_path=config_file )
+ else:
+ # Get the in-memory config as the on-disk one may be absent on a fresh instance.
+ # Luckily, a fresh instance won't have any configured security.
+ fresh_instance = True
+ config_url = 'http://localhost:8080/computer/(master)/config.xml'
+ with hide( 'output' ):
+ config_file.write( run( 'curl "%s"' % config_url ) )
+ config_file.seek( 0 )
+ config = ElementTree.parse( config_file )
+
+ yield config
+
+ config_file.truncate( 0 )
+ config.write( config_file, encoding='utf-8', xml_declaration=True )
+ if fresh_instance:
+ self.__service_jenkins( 'stop' )
+ try:
+ put( local_path=config_file, remote_path='~/config.xml' )
+ finally:
+ if fresh_instance:
+ self.__service_jenkins( 'start' )
+ else:
+ log.warn( 'Visit the Jenkins web UI and click Manage Jenkins - Reload '
+ 'Configuration from Disk' )
+
+
+ @fabric_task
+ def __service_jenkins( self, command ):
+ sudo( 'service jenkins %s' % command )
diff --git a/jenkins/src/cgcloud/jenkins/jenkins_slave.py b/jenkins/src/cgcloud/jenkins/jenkins_slave.py
new file mode 100644
index 0000000..b1c3f57
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/jenkins_slave.py
@@ -0,0 +1,138 @@
+from bd2k.util.xml.builder import E
+from cgcloud.core.agent_box import AgentBox
+from cgcloud.lib.util import snake_to_camel, UserError
+from cgcloud.fabric.operations import sudo
+from cgcloud.core.box import fabric_task
+from cgcloud.core.source_control_client import SourceControlClient
+from cgcloud.jenkins.jenkins_master import Jenkins, JenkinsMaster
+
+build_dir = '/home/jenkins/builds'
+
+
+class JenkinsSlave( SourceControlClient, AgentBox ):
+ """
+ A box that represents EC2 instances which can serve as a Jenkins build agent. This class is
+ typically used as a mix-in.
+ """
+
+ def other_accounts( self ):
+ return super( JenkinsSlave, self ).other_accounts( ) + [ Jenkins.user ]
+
+ def default_account( self ):
+ return Jenkins.user
+
+ def _post_install_packages( self ):
+ super( JenkinsSlave, self )._post_install_packages( )
+ self._setup_build_user( )
+
+ # TODO: We should probably remove this and let the agent take care of it
+
+ def __get_master_pubkey( self ):
+ ec2_keypair_name = JenkinsMaster.ec2_keypair_name( self.ctx )
+ ec2_keypair = self.ctx.ec2.get_key_pair( ec2_keypair_name )
+ if ec2_keypair is None:
+ raise UserError( "Missing EC2 keypair named '%s'. You must create the master before "
+ "creating slaves." % ec2_keypair_name )
+ return self.ctx.download_ssh_pubkey( ec2_keypair )
+
+ def _populate_ec2_keypair_globs( self, ec2_keypair_globs ):
+ super( JenkinsSlave, self )._populate_ec2_keypair_globs( ec2_keypair_globs )
+ ec2_keypair_globs.append( JenkinsMaster.ec2_keypair_name( self.ctx ) )
+
+ @fabric_task
+ def _setup_build_user( self ):
+ """
+ Setup a user account that accepts SSH connections from Jenkins such that it can act as a
+ Jenkins slave.
+ """
+ kwargs = dict(
+ user=Jenkins.user,
+ dir=build_dir,
+ ephemeral=self._ephemeral_mount_point( 0 ),
+ pubkey=self.__get_master_pubkey( ).strip( ) )
+
+ # Create the build user
+ #
+ sudo( 'useradd -m -s /bin/bash {0}'.format( Jenkins.user ) )
+ self._propagate_authorized_keys( Jenkins.user )
+
+ # Ensure that jenkins at jenkins-master can log into this box as the build user
+ #
+ sudo( "echo '{pubkey}' >> ~/.ssh/authorized_keys".format( **kwargs ),
+ user=Jenkins.user,
+ sudo_args='-i' )
+
+ self.setup_repo_host_keys( user=Jenkins.user )
+
+ # Setup working directory for all builds in either the build user's home or as a symlink to
+ # the ephemeral volume if available. Remember, the ephemeral volume comes back empty every
+ # time the box starts.
+ #
+ if sudo( 'test -d {ephemeral}'.format( **kwargs ), quiet=True ).failed:
+ sudo( 'mkdir {ephemeral}'.format( **kwargs ) )
+ chown_cmd = "mount {ephemeral} || true ; chown -R {user}:{user} {ephemeral}".format(
+ **kwargs )
+ # chown ephemeral storage now ...
+ sudo( chown_cmd )
+ # ... and every time instance boots. Note that command must work when set -e is in effect.
+ self._register_init_command( chown_cmd )
+ # link build directory as symlink to ephemeral volume
+ sudo( 'ln -snf {ephemeral} {dir}'.format( **kwargs ),
+ user=Jenkins.user,
+ sudo_args='-i' )
+
+ def __jenkins_labels( self ):
+ labels = self.role( ).split( '-' )
+ return [ l for l in labels if l not in [ 'jenkins', 'slave' ] ]
+
+ def slave_config_template( self, image, instance_type=None ):
+ """
+ Returns the slave template, i.e. a fragment of Jenkins configuration that,
+ if added to the master's main config file, controls how EC2 instances of this slave box
+ are created and managed by the master.
+
+ :param image: the image to boot slave instances from
+ :type image: boto.ec2.image.Image
+
+ :return: an XML element containing the slave template
+ :rtype: xml.etree.ElementTree.Element
+ """
+ if instance_type is None:
+ instance_type = self.recommended_instance_type( )
+ self._set_instance_options( image.tags )
+ spec = dict( instance_type=instance_type )
+ self._spec_block_device_mapping( spec, image )
+ return E( 'hudson.plugins.ec2.SlaveTemplate',
+ E.ami( image.id ),
+ # By convention we use the description element as the primary identifier. We
+ # don't need to use the absolute role name since we are not going to mix slaves
+ # from different namespaces:
+ E.description( self.role( ) ),
+ E.zone( self.ctx.availability_zone ),
+ E.securityGroups( self.ctx.to_aws_name( self._security_group_name( ) ) ),
+ E.remoteFS( build_dir ),
+ E.sshPort( '22' ),
+ E.type( snake_to_camel( instance_type, separator='.' ) ),
+ E.labels( ' '.join( self.__jenkins_labels( ) ) ),
+ E.mode( 'EXCLUSIVE' ),
+ E.initScript( 'while ! touch %s/.writable; do sleep 1; done' % build_dir ),
+ E.userData( spec.get( 'user_data', '' ) ),
+ E.numExecutors( '1' ),
+ E.remoteAdmin( Jenkins.user ),
+ # Using E.foo('') instead of just E.foo() yields <foo></foo> instead of <foo/>,
+ # consistent with how Jenkins serializes its config:
+ E.rootCommandPrefix( '' ),
+ E.jvmopts( '' ),
+ E.subnetId( '' ),
+ E.idleTerminationMinutes( '30' ),
+ E.iamInstanceProfile( self.get_instance_profile_arn( ) ),
+ E.useEphemeralDevices( 'true' ),
+ E.instanceCap( '1' ),
+ E.stopOnTerminate( 'false' ),
+ E.tags( *[
+ E( 'hudson.plugins.ec2.EC2Tag',
+ E.name( k ),
+ E.value( v ) )
+ for k, v in self._get_instance_options( ).iteritems( )
+ if v is not None ] ),
+ E.usePrivateDnsName( 'false' ) )
diff --git a/jenkins/src/cgcloud/jenkins/rpmbuild_jenkins_slaves.py b/jenkins/src/cgcloud/jenkins/rpmbuild_jenkins_slaves.py
new file mode 100644
index 0000000..deee9d6
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/rpmbuild_jenkins_slaves.py
@@ -0,0 +1,56 @@
+from cgcloud.core.box import fabric_task
+from cgcloud.core.centos_box import CentosBox
+from cgcloud.core.generic_boxes import GenericCentos5Box, GenericCentos6Box
+from cgcloud.fabric.operations import sudo
+
+from cgcloud.jenkins.jenkins_slave import JenkinsSlave
+
+
+class CentosRpmbuildJenkinsSlave( CentosBox, JenkinsSlave ):
+ """
+ Jenkins slave for building RPMs on CentOS
+ """
+
+ def _list_packages_to_install(self):
+ return super( CentosRpmbuildJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'rpmdevtools',
+ 'tk-devel',
+ 'tcl-devel',
+ 'expat-devel',
+ 'db4-devel',
+ 'gdbm-devel',
+ 'sqlite-devel',
+ 'bzip2-devel',
+ 'openssl-devel',
+ 'ncurses-devel',
+ 'readline-devel',
+ # for building the Apache RPM:
+ 'mock',
+ 'apr-devel',
+ 'apr-util-devel',
+ 'pcre-devel',
+ # for OpenSSH RPM:
+ 'pam-devel'
+ ]
+
+ @fabric_task
+ def _setup_build_user(self):
+ super( CentosRpmbuildJenkinsSlave, self )._setup_build_user( )
+ # Some RPM builds depend on the product of other RPM builds to be installed so we need to
+ # be able to run rpm in between RPM builds
+ sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" )
+ sudo( "useradd -s /sbin/nologin mockbuild" ) # goes with the mock package
+
+
+class Centos5RpmbuildJenkinsSlave(CentosRpmbuildJenkinsSlave, GenericCentos5Box):
+ """
+ Jenkins slave for building RPMs on CentOS 5
+ """
+ pass
+
+class Centos6RpmbuildJenkinsSlave(CentosRpmbuildJenkinsSlave, GenericCentos6Box):
+ """
+ Jenkins slave for building RPMs on CentOS 6
+ """
+ pass
diff --git a/jenkins/src/cgcloud/jenkins/s3am_jenkins_slave.py b/jenkins/src/cgcloud/jenkins/s3am_jenkins_slave.py
new file mode 100644
index 0000000..98940d7
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/s3am_jenkins_slave.py
@@ -0,0 +1,79 @@
+from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox
+from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave
+from cgcloud.core.box import fabric_task
+from cgcloud.core.common_iam_policies import s3_full_policy
+from cgcloud.fabric.operations import remote_sudo_popen
+from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc
+
+
+class S3amJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, Python27UpdateUbuntuBox ):
+ """
+ Jenkins slave for running the S3AM build
+ """
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return "m4.xlarge"
+
+ def _list_packages_to_install( self ):
+ return super( S3amJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'python-dev',
+ 'gcc', 'make', 'libcurl4-openssl-dev' # pycurl
+ ]
+
+ def _post_install_packages( self ):
+ super( S3amJenkinsSlave, self )._post_install_packages( )
+ self.__patch_asynchat( )
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( S3amJenkinsSlave, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( S3amJenkinsSlave )
+ policies.update( dict( s3_full=s3_full_policy ) )
+ return iam_role_name, policies
+
+ @fabric_task
+ def __patch_asynchat( self ):
+ """
+ This bites us in pyftpdlib during S3AM unit tests:
+
+ http://jenkins.cgcloud.info/job/s3am/13/testReport/junit/src.s3am.test.s3am_tests/CoreTests/test_copy/
+
+ The patch is from
+
+ https://hg.python.org/cpython/rev/d422062d7d36
+ http://bugs.python.org/issue16133
+ Fixed in 2.7.9: https://hg.python.org/cpython/raw-file/v2.7.9/Misc/NEWS
+ """
+ if self._remote_python_version() < (2,7,9):
+ with remote_sudo_popen( 'patch -d /usr/lib/python2.7 -p2' ) as patch:
+ patch.write( heredoc( '''
+ diff --git a/Lib/asynchat.py b/Lib/asynchat.py
+ --- a/Lib/asynchat.py
+ +++ b/Lib/asynchat.py
+ @@ -46,12 +46,17 @@ method) up to the terminator, and then c
+ you - by calling your self.found_terminator() method.
+ """
+
+ +import asyncore
+ +import errno
+ import socket
+ -import asyncore
+ from collections import deque
+ from sys import py3kwarning
+ from warnings import filterwarnings, catch_warnings
+
+ +_BLOCKING_IO_ERRORS = (errno.EAGAIN, errno.EALREADY, errno.EINPROGRESS,
+ + errno.EWOULDBLOCK)
+ +
+ +
+ class async_chat (asyncore.dispatcher):
+ """This is an abstract class. You must derive from this class, and add
+ the two methods collect_incoming_data() and found_terminator()"""
+ @@ -109,6 +114,8 @@ class async_chat (asyncore.dispatcher):
+ try:
+ data = self.recv (self.ac_in_buffer_size)
+ except socket.error, why:
+ + if why.args[0] in _BLOCKING_IO_ERRORS:
+ + return
+ self.handle_error()
+ return''' ) )
diff --git a/jenkins/src/cgcloud/jenkins/test/__init__.py b/jenkins/src/cgcloud/jenkins/test/__init__.py
new file mode 100644
index 0000000..f34c55b
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/test/__init__.py
@@ -0,0 +1 @@
+__author__ = 'hannes'
diff --git a/jenkins/src/cgcloud/jenkins/test/conftest.py b/jenkins/src/cgcloud/jenkins/test/conftest.py
new file mode 100644
index 0000000..862a5a2
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/test/conftest.py
@@ -0,0 +1 @@
+from cgcloud.core.test.conftest import pytest_configure
diff --git a/jenkins/src/cgcloud/jenkins/test/create_all_slaves.py b/jenkins/src/cgcloud/jenkins/test/create_all_slaves.py
new file mode 100644
index 0000000..42aa122
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/test/create_all_slaves.py
@@ -0,0 +1,338 @@
+from Queue import Queue
+from abc import ABCMeta, abstractmethod
+from functools import partial
+from threading import Thread
+import unittest
+import os
+import uuid
+import sys
+
+from bd2k.util.fnmatch import fnmatch
+
+try:
+ # Note that subprocess isn't thread-safe so subprocess is actually required. I'm just putting
+ # this in a try-except to make the test loader happy.
+ from subprocess32 import check_call, check_output
+except ImportError:
+ from subprocess import check_call, check_output
+
+
+# This is more of an experiment rather than a full-fledged test. It works on multiple EC2
+# instances in parallel, therefore making it well suited for semi-interactive use since you
+# don't have to wait as long for errors to show up. It runs all cgcloud invocations in tmux panes
+# inside a detached session. The tests print the tmux session ID so you can attach to it while
+# the test is running or afterwards for a post-mortem.
+#
+# Caveats: A successfull test will leave the tmux session running. Each test creates a new
+# session so you should clean up once in a while. The easisest way to do so is to run 'tmux
+# kill-server'.
+
+# Must have tmux, a fork of GNU Screen, installed for this.
+
+# Subprocess32 a backport of Python 3.2 must also be installed (via pip). 2.7's stock subprocess
+# keeps dead-locking on me.
+
+project_root = os.path.dirname( os.path.dirname( __file__ ) )
+cgcloud = 'cgcloud'
+
+production = True
+
+if production:
+ namespace = '/'
+ include_master = False
+else:
+ namespace = '/hannes/'
+ include_master = True
+
+
+class Pane( object ):
+ """
+ An abstraction of a tmux pane. A pane represents a terminal that you can run commands in.
+ Commands run asynchronously but you can synchronized on them using the result() method. You
+ should pre-allocate all panes you need before running commands in any of them. Commands are
+ run using the run() method. The join() method blocks until the command finishes. The tmux
+ pane remains open after the command finishes so you can do post-portem analysis on it,
+ the main reason I wrote this.
+
+ All panes in the interpreter share a single tmux session. The session has only one window but
+ panes can be broken out manually after attaching to the session.
+ """
+
+ session = 'cgcloud-%s' % uuid.uuid4( )
+ panes = [ ]
+
+ def log( self, s ):
+ sys.stderr.write( s + '\n' )
+ sys.stderr.flush( )
+
+ def __init__( self ):
+ super( Pane, self ).__init__( )
+ # One tmux channel for success, one for failures. See tmux(1).
+ self.channel_ids = tuple( uuid.uuid4( ) for _ in range( 2 ) )
+ # A queue between the daemon threads that service the channels and the client code. The
+ # queue items are the channel index, 0 for failure, 1 or success.
+ self.queue = Queue( maxsize=1 )
+ # The pane index.
+ self.index = len( self.panes )
+ window = '%s:0' % self.session
+ if self.index == 0:
+ self.log( "Run 'tmux attach -t %s' to monitor output" % self.session )
+ check_call(
+ [ 'tmux', 'new-session', '-d', '-s', self.session, '-x', '100', '-y', '80' ] )
+ self.tmux_id = check_output(
+ [ 'tmux', 'list-panes', '-t', window, '-F', '#{pane_id}' ] ).strip( )
+ else:
+ self.tmux_id = check_output(
+ [ 'tmux', 'split-window', '-v', '-t', window, '-PF', '#{pane_id}' ] ).strip( )
+ check_call( [ 'tmux', 'select-layout', '-t', window, 'even-vertical' ] )
+ self.panes.append( self )
+ self.threads = tuple( self._start_thread( i ) for i in range( 2 ) )
+
+ def _start_thread( self, channel_index ):
+ thread = Thread( target=partial( self._wait, channel_index ) )
+ thread.daemon = True
+ thread.start( )
+ return thread
+
+ def _wait( self, channel_index ):
+ while True:
+ check_call( [ 'tmux', 'wait', str( self.channel_ids[ channel_index ] ) ] )
+ self.queue.put( channel_index )
+
+ def run( self, cmd, ignore_failure=False ):
+ fail_ch, success_ch = self.channel_ids
+ if ignore_failure:
+ cmd = '( %s ) ; tmux wait -S %s' % (cmd, success_ch)
+ else:
+ cmd = '( %s ) && tmux wait -S %s || tmux wait -S %s' % (cmd, success_ch, fail_ch)
+ check_call( [ 'tmux', 'send-keys', '-t', self.tmux_id, cmd, 'C-m' ] )
+
+ def result( self ):
+ return (False, True)[ self.queue.get( ) ]
+
+
+class Command( object ):
+ """
+ A glorified string template for cgcloud command lines. The default values for the template
+ arguments specified at construction can be overriden when the command is actually run,
+ i.e. when the template is instantiated. The value for a template parameter can be either a
+ static value or a callable taking two arguments, role and ordinal. The callable will be
+ evaluated at instantiation time with the role and ordinal of the concrete box cgcloud should
+ be run against. A command can be set to ignore failures, in which case a non-zero exit code
+ from cgcloud does not fail the test. A command can be 'reverse' which means that it should be
+ run against the list of boxes in the reverse order. How exactly "reverse" is implemented
+ depends on the client.
+ """
+
+ def __init__( self, command, template, ignore_failure=False, reverse=False, **template_args ):
+ super( Command, self ).__init__( )
+ self.template = "{cgcloud} {command} -n {namespace} " + template
+ self.template_args = template_args.copy( )
+ self.template_args.update( cgcloud=cgcloud, command=command, namespace=namespace )
+ self.ignore_failure = ignore_failure
+ self.reverse = reverse
+
+ def run( self, pane, role, ordinal, **template_args ):
+ """
+ Instantiate this command line template and run it in the specified pane against the box
+ of the specified role and ordinal, substituting additional template parameters with the
+ given keyword arguments.
+ """
+ # start with defaults
+ _template_args = self.template_args.copy( )
+ # update with overrides
+ _template_args.update( template_args )
+ # expand callables
+ _template_args = dict( (k, v( role, ordinal ) if callable( v ) else v)
+ for k, v in _template_args.iteritems( ) )
+ # set role and ordinal
+ _template_args.update( role=role, ordinal=ordinal )
+ # finally, run the command in the pane
+ pane.run( self.template.format( **_template_args ), ignore_failure=self.ignore_failure )
+
+
+# Factory methods for cgcloud commands:
+
+def create( options="" ):
+ return Command( "create", "--never-terminate {options} {role}", options=options )
+
+
+def recreate( options="" ):
+ return Command( "recreate", "--never-terminate {options} {role}", options=options )
+
+
+def start( options="" ):
+ return Command( "start", "-o {ordinal} {options} {role}", options=options )
+
+
+def stop( options="" ):
+ return Command( "stop", "-o {ordinal} {options} {role}", reverse=True, options=options )
+
+
+def ssh( ssh_command="", options="" ):
+ return Command( "ssh", "-o {ordinal} {options} {role} {ssh_command}",
+ ssh_command=ssh_command,
+ options=options )
+
+
+def rsync( rsync_args, options="" ):
+ return Command( "rsync", "-o {ordinal} {options} {role} {rsync_args}",
+ rsync_args=rsync_args,
+ options=options )
+
+
+def image( options="" ):
+ return Command( "image", "-o {ordinal} {options} {role}", options=options )
+
+
+def terminate( options="" ):
+ return Command( "terminate", "-o {ordinal} {options} {role}",
+ ignore_failure=True,
+ reverse=True,
+ options=options )
+
+
+class BaseTest( unittest.TestCase ):
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def _execute_command( self, command ):
+ pass
+
+ def _list_roles( self, slave_glob ):
+ slaves = [ slave
+ for slave in check_output( [ cgcloud, 'list-roles' ] ).split( '\n' )
+ if fnmatch( slave, slave_glob ) ]
+ return slaves
+
+ def _test( self, *commands ):
+ for command in commands:
+ self._execute_command( command )
+
+class DevEnvTest( BaseTest ):
+ """
+ Tests the creation of the Jenkins master and its slaves for continuous integration.
+ """
+ # slave_glob = '*-genetorrent-jenkins-slave'
+ # slave_glob = '*-generic-jenkins-slave'
+ # slave_glob = '*-rpmbuild-jenkins-slave'
+ slave_glob = 'centos5-*-jenkins-slave'
+
+ def _init_panes( self ):
+ slave_roles = self._list_roles( self.slave_glob )
+ self.master_pane = Pane( ) if include_master else None
+ self.slave_panes = dict( (slave_role, Pane( )) for slave_role in slave_roles )
+
+ def test_everything( self ):
+ self._init_panes( )
+ self._test(
+ create( ),
+ stop( ),
+ image( ),
+ start( ),
+ terminate( ),
+ recreate( ),
+ ssh( ),
+ terminate( ) )
+
+ def _execute_command( self, command ):
+ def test_master( ):
+ if self.master_pane is not None:
+ command.run( self.master_pane, 'jenkins-master', ordinal=-1 )
+ self.assertTrue( self.master_pane.result( ) )
+
+ def test_slaves( ):
+ for slave_role, pane in self.slave_panes.iteritems( ):
+ command.run( pane, slave_role, ordinal=-1 )
+ for pane in self.slave_panes.itervalues( ):
+ self.assertTrue( pane.result( ) )
+
+ tests = [ test_master, test_slaves ]
+
+ for test in reversed( tests ) if command.reverse else tests: test( )
+
+class LoadTest( BaseTest ):
+ key_file = '~/MORDOR1.pem' # local path, this will copied to each box
+ role = 'load-test-box' # name of the cgcloud role
+ base_url = 'https://stage.cghub.ucsc.edu/cghub/data/analysis/download/'
+ instance_type = "m3.2xlarge"
+ if False:
+ uuids = [
+ "b08210ce-b0c1-4d6a-8762-0f981c27d692",
+ "ffb4cff4-06ea-4332-8002-9aff51d5d388",
+ "5c07378f-cafe-42db-a66e-d608f2f0e982",
+ "7fffef66-627f-43f7-96b3-6672e1cb6b59",
+ "7ec3fa29-bbec-4d08-839b-c1cd60909ed0",
+ "4714ee84-26cd-48e7-860d-a115af0fca48",
+ "9266e7ca-c6f9-4187-ab8b-f11f6c65bc71",
+ "9cd637b0-9b68-4fd7-bd9e-fa41e5329242",
+ "71ec0937-7812-4b35-87de-77174fdb28bc",
+ "d49add54-27d2-4d77-b719-19f4d77c10c3" ]
+ else:
+ uuids = [
+ "7c619bf2-6470-4e01-9391-1c5db775537e", # 166GBs
+ "27a1b0dc-3f1a-4606-9bd7-8b7a0a89e066", # 166GBs
+ "027d9b42-cf22-429a-9741-da6049a5f192", # 166GBs
+ "0600bae1-2d63-41fd-9dee-b5d3cd21b3ee", # 166GBs
+ "c3cf7d48-e0c1-4605-a951-34ad83916361", # 166GBs
+ # "4c87ef17-3d1b-478f-842f-4bb855abdda1", # 166GBs, unauthorized for MORDOR1.pem
+ "44806b1a-2d77-4b67-9774-67e8a5555f88", # 166GBs
+ "727e2955-67a3-431c-9c7c-547e6b8b7c95", # 166GBs
+ "99728596-1409-4d5e-b2dc-744b5ba2aeab", # 166GBs
+ # "c727c612-1be1-8c27-e040-ad451e414a7f" # >500GBs, causes 409 during download, maybe fixed now
+ ]
+ num_instances = len( uuids )
+ num_children = 8
+
+ def test_load( self ):
+ self._init_panes( )
+ self._test(
+ # recreate( "-t %s" % self.instance_type ),
+ # rsync( '-v %s :' % self.key_file ),
+ # ssh( self._gtdownload ),
+ terminate( '--quick' ),
+ )
+
+ def _gtdownload( self, role, ordinal ):
+ return "gtdownload -d {base_url}{uuid} -c {key_file} -vv --null-storage --max-children {num_children}".format(
+ base_url=self.base_url,
+ uuid=self.uuids[ ordinal ],
+ key_file=os.path.basename( self.key_file ),
+ num_children=self.num_children )
+
+ def _init_panes( self ):
+ self.panes = [ Pane( ) for _ in range( 0, self.num_instances ) ]
+
+ def _execute_command( self, command ):
+ for i, pane in enumerate( self.panes ):
+ command.run( pane, self.role, ordinal=(i - self.num_instances) )
+ for pane in self.panes:
+ self.assertTrue( pane.result( ) )
+
+class TrackerStressTest( BaseTest ):
+ role = 'load-test-box' # name of the cgcloud role
+ stress_tracker_script = '/Users/hannes/workspace/cghub/tests/stress_tracker'
+ instance_type = 'm3.2xlarge'
+ num_instances = 8
+
+ def test_tracker_stress( self ):
+ self._init_panes( )
+ self._test(
+ # recreate( '-t %s' % self.instance_type ),
+ # rsync( '-v %s :' % self.stress_tracker_script ),
+ # ssh( 'python %s' % os.path.basename( self.stress_tracker_script ) ),
+ terminate( '--quick' ),
+ )
+
+ def _init_panes( self ):
+ self.panes = [ Pane( ) for _ in range( 0, self.num_instances ) ]
+
+ def _execute_command( self, command ):
+ for i, pane in enumerate( self.panes ):
+ command.run( pane, self.role, ordinal=(i - self.num_instances) )
+ for pane in self.panes:
+ self.assertTrue( pane.result( ) )
+
+
+if __name__ == '__main__':
+ unittest.main( )
diff --git a/jenkins/src/cgcloud/jenkins/toil_jenkins_slave.py b/jenkins/src/cgcloud/jenkins/toil_jenkins_slave.py
new file mode 100644
index 0000000..e517189
--- /dev/null
+++ b/jenkins/src/cgcloud/jenkins/toil_jenkins_slave.py
@@ -0,0 +1,381 @@
+from StringIO import StringIO
+import time
+import re
+
+from fabric.operations import run, put
+from bd2k.util.strings import interpolate as fmt
+
+from cgcloud.core.apache import ApacheSoftwareBox
+from cgcloud.core.mesos_box import MesosBox
+from cgcloud.jenkins.cgcloud_jenkins_slave import CgcloudJenkinsSlave
+from cgcloud.jenkins.jenkins_master import Jenkins
+from cgcloud.core.box import fabric_task
+from cgcloud.core.common_iam_policies import s3_full_policy, sdb_full_policy
+from cgcloud.core.docker_box import DockerBox
+from cgcloud.fabric.operations import sudo, remote_sudo_popen, remote_open
+from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc
+
+hadoop_version = '2.6.2'
+# The major version of Hadoop that the Spark binaries were built against
+spark_hadoop_version = '2.6'
+spark_version = '1.5.2'
+install_dir = '/opt'
+
+# Inherits CgcloudJenkinsSlave because the Toil integration tests invoke cgcloud to launch more
+# instances, similar to what the CGCloud tests do.
+
+class ToilJenkinsSlave( CgcloudJenkinsSlave,
+ DockerBox,
+ MesosBox,
+ ApacheSoftwareBox ):
+ """
+ Jenkins slave for running the Toil build and tests on
+ """
+
+ @classmethod
+ def recommended_instance_type( cls ):
+ return "m3.large"
+
+ def _list_packages_to_install( self ):
+ return super( ToilJenkinsSlave, self )._list_packages_to_install( ) + [
+ 'python-dev', 'gcc', 'make',
+ 'libffi-dev', # pynacl -> toil, Azure client-side encryption
+ 'libcurl4-openssl-dev', # pycurl -> SPARQLWrapper -> rdflib>=4.2.0 -> cwltool -> toil
+ 'slurm-llnl', 'bc', # SLURM
+ ] + [ 'gridengine-' + p for p in ('common', 'master', 'client', 'exec') ]
+
+ def _get_debconf_selections( self ):
+ return super( ToilJenkinsSlave, self )._get_debconf_selections( ) + [
+ 'gridengine-master shared/gridenginemaster string localhost',
+ 'gridengine-master shared/gridenginecell string default',
+ 'gridengine-master shared/gridengineconfig boolean true' ]
+
+ def _post_install_packages( self ):
+ super( ToilJenkinsSlave, self )._post_install_packages( )
+ self.setup_repo_host_keys( )
+ self.__disable_mesos_daemons( )
+ self.__install_parasol( )
+ self.__patch_distutils( )
+ self.__configure_gridengine( )
+ self.__configure_slurm( )
+ self.__install_yarn( )
+ self.__install_spark( )
+
+ @fabric_task
+ def _setup_build_user( self ):
+ super( ToilJenkinsSlave, self )._setup_build_user( )
+ # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for
+ # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of
+ # left-over files from being skewed by other activities on the ephemeral file system,
+ # like build logs, creation of .pyc files, etc.
+ for prog in ('mount', 'umount'):
+ sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" % prog )
+
+ @fabric_task
+ def __disable_mesos_daemons( self ):
+ for daemon in ('master', 'slave'):
+ sudo( 'echo manual > /etc/init/mesos-%s.override' % daemon )
+
+ @fabric_task
+ def __install_parasol( self ):
+ run( "git clone https://github.com/BD2KGenomics/parasol-binaries.git" )
+ sudo( "cp parasol-binaries/* /usr/local/bin" )
+ run( "rm -rf parasol-binaries" )
+
+ @fabric_task
+ def __install_yarn ( self ):
+ # Download and extract Hadoop
+ path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' )
+ self._install_apache_package( path, install_dir )
+
+ # patch path
+ with remote_open( '/etc/environment', use_sudo=True ) as f:
+ yarn_path = fmt( '{install_dir}/hadoop' )
+ self._patch_etc_environment( f, env_pairs=dict( HADOOP_HOME=yarn_path ) )
+
+
+ @fabric_task
+ def __install_spark ( self ):
+ # Download and extract Spark
+ path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' )
+ self._install_apache_package( path, install_dir )
+
+ # Patch paths
+ with remote_open( '/etc/environment', use_sudo=True ) as f:
+ spark_home = fmt( '{install_dir}/spark' )
+ # These two PYTHONPATH entries are also added by the 'pyspark' wrapper script.
+ # We need to replicate them globally because we want to be able to just do
+ # 'import pyspark' in Toil's Spark service code and associated tests.
+ python_path = [ fmt( '{spark_home}/python' ),
+ run( fmt( 'ls {spark_home}/python/lib/py4j-*-src.zip' ).strip() ) ]
+ self._patch_etc_environment( f,
+ env_pairs=dict( SPARK_HOME=spark_home ),
+ dirs=python_path,
+ dirs_var='PYTHONPATH' )
+
+ def _pass_role_arn( self ):
+ # Very permissive. But it is needed to accommodate the appliance based provisioning tests
+ # in Toil as they don't use CGCloud's concept of namespaces.
+ return 'arn:aws:iam::%s:role/*' % self.ctx.account
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( ToilJenkinsSlave, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( ToilJenkinsSlave )
+ policies.update( dict( s3_full=s3_full_policy, sdb_full=sdb_full_policy ) )
+ return iam_role_name, policies
+
+ @fabric_task
+ def __patch_distutils( self ):
+ """
+ https://hg.python.org/cpython/rev/cf70f030a744/
+ https://bitbucket.org/pypa/setuptools/issues/248/exit-code-is-zero-when-upload-fails
+ Fixed in 2.7.8: https://hg.python.org/cpython/raw-file/v2.7.8/Misc/NEWS
+ """
+ if self._remote_python_version( ) < (2, 7, 8):
+ with remote_sudo_popen( 'patch -d /usr/lib/python2.7 -p2' ) as patch:
+ patch.write( heredoc( """
+ --- a/Lib/distutils/command/upload.py
+ +++ b/Lib/distutils/command/upload.py
+ @@ -10,7 +10,7 @@ import urlparse
+ import cStringIO as StringIO
+ from hashlib import md5
+
+ -from distutils.errors import DistutilsOptionError
+ +from distutils.errors import DistutilsError, DistutilsOptionError
+ from distutils.core import PyPIRCCommand
+ from distutils.spawn import spawn
+ from distutils import log
+ @@ -181,7 +181,7 @@ class upload(PyPIRCCommand):
+ self.announce(msg, log.INFO)
+ except socket.error, e:
+ self.announce(str(e), log.ERROR)
+ - return
+ + raise
+ except HTTPError, e:
+ status = e.code
+ reason = e.msg
+ @@ -190,5 +190,6 @@ class upload(PyPIRCCommand):
+ self.announce('Server response (%s): %s' % (status, reason),
+ log.INFO)
+ else:
+ - self.announce('Upload failed (%s): %s' % (status, reason),
+ - log.ERROR)
+ + msg = 'Upload failed (%s): %s' % (status, reason)
+ + self.announce(msg, log.ERROR)
+ + raise DistutilsError(msg)""" ) )
+
+ @fabric_task
+ def __configure_gridengine( self ):
+ """
+ Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure
+ that the queue is updated to reflect the number of cores actually available.
+ """
+
+ ws = re.compile( r'\s+' )
+ nl = re.compile( r'[\r\n]+' )
+
+ def qconf( opt, **kwargs ):
+ return qconf_dict( opt, kwargs )
+
+ def qconf_dict( opt, d=None, file_name='qconf.tmp' ):
+ if d:
+ # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
+ s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n'
+ put( remote_path=file_name, local_path=StringIO( s ) )
+ sudo( ' '.join( [ 'qconf', opt, file_name ] ) )
+ run( ' '.join( [ 'rm', file_name ] ) )
+ else:
+ return dict( tuple( ws.split( l, 1 ) )
+ for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) )
+ if l and not l.startswith( '#' ) )
+
+ # Add the user defined in fname to the Sun Grid Engine cluster.
+ qconf( '-Auser', name=Jenkins.user, oticket='0', fshare='0', delete_time='0',
+ default_project='NONE' )
+
+ # Adds users to Sun Grid Engine user access lists (ACLs).
+ sudo( 'qconf -au %s arusers' % Jenkins.user )
+
+ # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and
+ # control their behavior only.
+ sudo( 'qconf -as localhost' )
+
+ # Remove all currently defined execution hosts
+ run( 'for i in `qconf -sel`; do sudo qconf -de $i ; done' )
+
+ # Add an execution host
+ qconf( '-Ae', hostname='localhost', load_scaling='NONE', complex_values='NONE',
+ user_lists='arusers', xuser_lists='NONE', projects='NONE', xprojects='NONE',
+ usage_scaling='NONE', report_variables='NONE' )
+
+ # Add a parallel environment
+ qconf( '-Ap', pe_name='smp', slots='999', user_lists='NONE', xuser_lists='NONE',
+ start_proc_args='/bin/true', stop_proc_args='/bin/true', allocation_rule='$pe_slots',
+ control_slaves='FALSE', job_is_first_task='TRUE', urgency_slots='min',
+ accounting_summary='FALSE' )
+
+ # Add a queue, the slots and processors will be adjusted dynamically, by an init script
+ qconf( '-Aq', qname='all.q', processors='1', slots='1', hostlist='localhost', seq_no='0',
+ load_thresholds='np_load_avg=1.75', suspend_thresholds='NONE', nsuspend='1',
+ suspend_interval='00:05:00', priority='0', min_cpu_interval='00:05:00',
+ qtype='BATCH INTERACTIVE', ckpt_list='NONE', pe_list='make smp', rerun='FALSE',
+ tmpdir='/tmp', shell='/bin/bash', prolog='NONE', epilog='NONE',
+ shell_start_mode='posix_compliant', starter_method='NONE', suspend_method='NONE',
+ resume_method='NONE', terminate_method='NONE', notify='00:00:60', owner_list='NONE',
+ user_lists='arusers', xuser_lists='NONE', subordinate_list='NONE',
+ complex_values='NONE', projects='NONE', xprojects='NONE', calendar='NONE',
+ initial_state='default', s_rt='INFINITY', h_rt='INFINITY', s_cpu='INFINITY',
+ h_cpu='INFINITY', s_fsize='INFINITY', h_fsize='INFINITY', s_data='INFINITY',
+ h_data='INFINITY', s_stack='INFINITY', h_stack='INFINITY', s_core='INFINITY',
+ h_core='INFINITY', s_rss='INFINITY', h_rss='INFINITY', s_vmem='INFINITY',
+ h_vmem='INFINITY' )
+
+ # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting
+ # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf.
+ sconf = qconf( '-ssconf' )
+ sconf.update( dict( flush_submit_sec='1', flush_finish_sec='1',
+ schedule_interval='0:0:1' ) )
+ qconf_dict( '-Msconf', sconf )
+
+ # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the
+ # qacct program to determine the exit code of a finished job. The qacct program reads
+ # the accounting file. By default, this file is written to every 15 seconds which means
+ # that it may take up to 15 seconds before a finished job is seen by Toil. An
+ # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed
+ # immediately, allowing qacct to report the status of finished jobs immediately. Again,
+ # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be
+ # 'global'.
+ conf = qconf( '-sconf' )
+ params = dict( tuple( e.split( '=' ) ) for e in conf[ 'reporting_params' ].split( ' ' ) )
+ params[ 'accounting_flush_time' ] = '00:00:00'
+ conf[ 'reporting_params' ] = ' '.join( '='.join( e ) for e in params.iteritems( ) )
+ qconf_dict( '-Mconf', conf, file_name='global' )
+
+ # Register an init-script that ensures GridEngine uses localhost instead of hostname
+ path = '/var/lib/gridengine/default/common/'
+ self._register_init_script( 'gridengine-pre', heredoc( """
+ description "GridEngine pre-start configuration"
+ console log
+ start on filesystem
+ pre-start script
+ echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster
+ echo localhost `hostname -f` > {path}/host_aliases
+ end script""" ) )
+
+ # Register an init-script that adjust the queue config to reflect the number of cores
+ self._register_init_script( 'gridengine-post', heredoc( """
+ description "GridEngine post-start configuration"
+ console log
+ # I would rather depend on the gridengine daemons but don't know how as they are
+ # started by SysV init scripts. Supposedly the 'rc' job is run last.
+ start on started rc
+ pre-start script
+ cores=$(grep -c '^processor' /proc/cpuinfo)
+ qconf -mattr queue processors $cores `qselect`
+ qconf -mattr queue slots $cores `qselect`
+ end script""" ) )
+
+ # Run pre-start script
+ for daemon in ('exec', 'master'):
+ sudo( '/etc/init.d/gridengine-%s stop' % daemon )
+ sudo( "killall -9 -r 'sge_.*'", warn_only=True ) # the exec daemon likes to hang
+ self._run_init_script( 'gridengine-pre' )
+ for daemon in ('master', 'exec'):
+ sudo( '/etc/init.d/gridengine-%s start' % daemon )
+
+ # Run post-start script
+ self._run_init_script( 'gridengine-post' )
+ while 'execd is in unknown state' in run( 'qstat -f -q all.q -explain a', warn_only=True ):
+ time.sleep( 1 )
+
+ @fabric_task
+ def __configure_slurm( self ):
+ """
+ Configures SLURM in a single-node configuration with text-file accounting
+ :return:
+ """
+ # Create munge key and start
+ sudo('/usr/sbin/create-munge-key')
+ sudo('/usr/sbin/service munge start')
+
+ slurm_acct_file = '/var/log/slurm-llnl/slurm-acct.txt'
+
+ # Default values placed into compute node config, will be replaced by pre script
+ slurm_conf = heredoc("""
+ ClusterName=jenkins-testing
+ ControlMachine=localhost
+ SlurmUser=slurm
+ SlurmctldPort=6817
+ SlurmdPort=6818
+ StateSaveLocation=/tmp
+ SlurmdSpoolDir=/tmp/slurmd
+ SwitchType=switch/none
+ MpiDefault=none
+ SlurmctldPidFile=/var/run/slurmctld.pid
+ SlurmdPidFile=/var/run/slurmd.pid
+ ProctrackType=proctrack/pgid
+ CacheGroups=0
+ ReturnToService=0
+ SlurmctldTimeout=300
+ SlurmdTimeout=300
+ InactiveLimit=0
+ MinJobAge=300
+ KillWait=30
+ Waittime=0
+ SchedulerType=sched/backfill
+ SelectType=select/cons_res
+ FastSchedule=1
+
+ # LOGGING
+ SlurmctldDebug=3
+ SlurmdDebug=3
+ JobCompType=jobcomp/none
+
+ # ACCOUNTING
+ AccountingStorageLoc={slurm_acct_file}
+ AccountingStorageType=accounting_storage/filetxt
+ AccountingStoreJobComment=YES
+ JobAcctGatherFrequency=30
+ JobAcctGatherType=jobacct_gather/linux
+
+ # COMPUTE NODES
+ NodeName=localhost CPUs=1 State=UNKNOWN RealMemory=256
+ PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP
+ """)
+ slurm_conf_tmp = '/tmp/slurm.conf'
+ slurm_conf_file = '/etc/slurm-llnl/slurm.conf'
+ # Put config file in: /etc/slurm-llnl/slurm.conf
+ put( remote_path=slurm_conf_tmp, local_path=StringIO( slurm_conf ) )
+ sudo( 'mkdir -p /etc/slurm-llnl')
+ sudo( 'mv %s %s' % (slurm_conf_tmp, slurm_conf_file ) )
+ sudo('chown root:root %s' % slurm_conf_file )
+
+ # Touch the accounting job file and make sure it's owned by slurm user
+ sudo('mkdir -p /var/log/slurm-llnl')
+ sudo('touch %s' % slurm_acct_file)
+ sudo('chown slurm:slurm %s' % slurm_acct_file)
+ sudo('chmod 644 %s' % slurm_acct_file)
+
+ # Register an init-script that sets the CPUs and RealMemory in slurm.conf
+ # slurm.conf needs cpus and memory in order to handle jobs with these resource requests
+ self._register_init_script( 'slurm-llnl-pre', heredoc( """
+ description "Slurm pre-start configuration"
+ console log
+ start on filesystem
+ pre-start script
+ CPUS=$(/usr/bin/nproc)
+ MEMORY=$(cat /proc/meminfo | grep MemTotal | awk '{{print $2, "/ 1024"}}' | bc)
+ sed -i "s/CPUs=[0-9]\+/CPUs=${{CPUS}}/" {slurm_conf_file}
+ sed -i "s/RealMemory=[0-9]\+/RealMemory=${{MEMORY}}/" {slurm_conf_file}
+ end script""" ) )
+
+ # Start slurm services
+ self._run_init_script('slurm-llnl-pre')
+ self._run_init_script('slurm-llnl')
+
+ # Ensure partition is up
+ sudo('scontrol update NodeName=localhost State=Down')
+ sudo('scontrol update NodeName=localhost State=Resume')
+
+ def _docker_users( self ):
+ return super( ToilJenkinsSlave, self )._docker_users( ) + [ self.default_account( ) ]
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/lib/setup.cfg b/lib/setup.cfg
new file mode 100644
index 0000000..11f789d
--- /dev/null
+++ b/lib/setup.cfg
@@ -0,0 +1,6 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
+norecursedirs = cgcloud_Crypto
diff --git a/lib/setup.py b/lib/setup.py
new file mode 100644
index 0000000..c0d7680
--- /dev/null
+++ b/lib/setup.py
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, boto_dep
+
+setup(
+ name='cgcloud-lib',
+ version=cgcloud_version,
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Components shared between cgcloud-core and cgcloud-agent',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[
+ bd2k_python_lib_dep,
+ boto_dep ] )
diff --git a/lib/src/cgcloud/__init__.py b/lib/src/cgcloud/__init__.py
new file mode 100644
index 0000000..1148131
--- /dev/null
+++ b/lib/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
\ No newline at end of file
diff --git a/lib/src/cgcloud/lib/__init__.py b/lib/src/cgcloud/lib/__init__.py
new file mode 100644
index 0000000..fd64ba4
--- /dev/null
+++ b/lib/src/cgcloud/lib/__init__.py
@@ -0,0 +1,5 @@
+from bd2k.util.d32 import standard as d32
+
+aws_d32 = d32
+
+test_namespace_suffix_length = 13
diff --git a/lib/src/cgcloud/lib/context.py b/lib/src/cgcloud/lib/context.py
new file mode 100644
index 0000000..180f615
--- /dev/null
+++ b/lib/src/cgcloud/lib/context.py
@@ -0,0 +1,864 @@
+# coding=utf-8
+import hashlib
+from contextlib import contextmanager
+import json
+import os
+import urllib
+import re
+import socket
+import itertools
+import logging
+
+from boto import ec2, iam, sns, sqs, vpc
+from boto.s3.key import Key as S3Key
+from boto.exception import S3ResponseError, BotoServerError
+from boto.s3.connection import S3Connection
+from boto.sqs.connection import SQSConnection
+from boto.sns.connection import SNSConnection
+from boto.vpc import VPCConnection
+from boto.iam.connection import IAMConnection
+from boto.ec2.keypair import KeyPair
+from bd2k.util import fnmatch
+from bd2k.util import memoize
+from boto.utils import get_instance_metadata
+
+from cgcloud.lib.message import Message
+from cgcloud.lib.util import ec2_keypair_fingerprint, UserError
+
+log = logging.getLogger( __name__ )
+
+
+class Context( object ):
+ """
+ Encapsulates all EC2-specific settings used by components in this project
+ """
+ availability_zone_re = re.compile( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$' )
+
+ name_prefix_re = re.compile( r'^(/([0-9a-z][0-9a-z._-]*))*' )
+ name_re = re.compile( name_prefix_re.pattern + '/?$' )
+ namespace_re = re.compile( name_prefix_re.pattern + '/$' )
+
+ def __init__( self, availability_zone, namespace ):
+ """
+ Create an Context object.
+
+ :param availability_zone: The availability zone to place EC2 resources like volumes and
+ instances into. The AWS region to operate in is implied by this parameter since the
+ region is a prefix of the availability zone string
+
+ :param namespace: The prefix for names of EC2 resources. The namespace is string starting
+ in '/' followed by zero or more components, separated by '/'. Components are non-empty
+ strings consisting only of alphanumeric characters, '.', '-' or '_' and that don't start
+ with '_'. The namespace argument will be encoded as ASCII. Unicode strings that can't be
+ encoded as ASCII will be rejected.
+
+ A note about our namespaces vs IAM's resource paths. IAM paths don't provide namespace
+ isolation. In other words, it is not possible to have two users of the same name in two
+ different paths. The by itself name has to be unique. For that reason, IAM resource paths
+ are pretty much useless.
+
+ >>> ctx = Context( 'us-west-1b', None )
+ Traceback (most recent call last):
+ ....
+ ValueError: Need namespace
+
+ >>> Context('us-west-1b', namespace='/').namespace
+ '/'
+
+ >>> Context('us-west-1b', namespace='/foo/').namespace
+ '/foo/'
+
+ >>> Context('us-west-1b', namespace='/foo/bar/').namespace
+ '/foo/bar/'
+
+ >>> Context('us-west-1b', namespace='')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace ''
+
+ >>> Context('us-west-1b', namespace='foo')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace 'foo'
+
+ >>> Context('us-west-1b', namespace='/foo')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace '/foo'
+
+ >>> Context('us-west-1b', namespace='//foo/')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace '//foo/'
+
+ >>> Context('us-west-1b', namespace='/foo//')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace '/foo//'
+
+ >>> Context('us-west-1b', namespace='han//nes')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace 'han//nes'
+
+ >>> Context('us-west-1b', namespace='/_foo/')
+ Traceback (most recent call last):
+ ....
+ ValueError: Invalid namespace '/_foo/'
+
+ >>> Context('us-west-1b', namespace=u'/foo/').namespace
+ '/foo/'
+
+ >>> Context('us-west-1b', namespace=u'/föo/').namespace
+ Traceback (most recent call last):
+ ....
+ ValueError: 'ascii' codec can't encode characters in position 2-3: ordinal not in range(128)
+
+ >>> import string
+ >>> component = string.ascii_lowercase + string.digits + '-_.'
+ >>> namespace = '/' + component + '/'
+ >>> Context('us-west-1b', namespace=namespace).namespace == namespace
+ True
+ """
+ super( Context, self ).__init__( )
+
+ self.__iam = None
+ self.__vpc = None
+ self.__s3 = None
+ self.__sns = None
+ self.__sqs = None
+
+ self.availability_zone = availability_zone
+ m = self.availability_zone_re.match( availability_zone )
+ if not m:
+ raise ValueError( "Can't extract region from availability zone '%s'"
+ % availability_zone )
+ self.region = m.group( 1 )
+
+ if namespace is None:
+ raise ValueError( 'Need namespace' )
+ try:
+ namespace = namespace.encode( 'ascii' )
+ except UnicodeEncodeError as e:
+ raise ValueError( e )
+
+ namespace = self.resolve_me( namespace )
+
+ if not re.match( self.namespace_re, namespace ):
+ raise ValueError( "Invalid namespace '%s'" % namespace )
+
+ self.namespace = namespace
+
+ @property
+ def iam( self ):
+ """
+ :rtype: IAMConnection
+ """
+ if self.__iam is None:
+ self.__iam = self.__aws_connect( iam, 'universal' )
+ return self.__iam
+
+ # VPCConnection extends EC2Connection so we can use one instance of the former for both
+
+ @property
+ def vpc( self ):
+ """
+ :rtype: VPCConnection
+ """
+ if self.__vpc is None:
+ self.__vpc = self.__aws_connect( vpc )
+ return self.__vpc
+
+ # ec2 = vpc works, too, but confuses the type hinter in PyCharm
+
+ @property
+ def ec2( self ):
+ """
+ :rtype: VPCConnection
+ """
+ return self.vpc
+
+ @property
+ def s3( self ):
+ """
+ :rtype: S3Connection
+ """
+ if self.__s3 is None:
+ # We let S3 route buckets to regions for us. If we connected to a specific region,
+ # bucket lookups (HEAD request against bucket URL) would fail with 301 status but
+ # without a Location header.
+ self.__s3 = S3Connection( )
+ return self.__s3
+
+ @property
+ def sns( self ):
+ """
+ :rtype: SNSConnection
+ """
+ if self.__sns is None:
+ self.__sns = self.__aws_connect( sns )
+ return self.__sns
+
+ @property
+ def sqs( self ):
+ """
+ :rtype: SQSConnection
+ """
+ if self.__sqs is None:
+ self.__sqs = self.__aws_connect( sqs )
+ return self.__sqs
+
+ def __aws_connect( self, aws_module, region=None, **kwargs ):
+ if region is None:
+ region = self.region
+ conn = aws_module.connect_to_region( region, **kwargs )
+ if conn is None:
+ raise RuntimeError( "%s couldn't connect to region %s" % (
+ aws_module.__name__, region) )
+ return conn
+
+ def __enter__( self ):
+ return self
+
+ def __exit__( self, exc_type, exc_val, exc_tb ):
+ self.close( )
+
+ def close( self ):
+ if self.__vpc is not None: self.__vpc.close( )
+ if self.__s3 is not None: self.__s3.close( )
+ if self.__iam is not None: self.__iam.close( )
+ if self.__sns is not None: self.__sns.close( )
+ if self.__sqs is not None: self.__sqs.close( )
+
+ @staticmethod
+ def is_absolute_name( name ):
+ """
+ Returns True if the given name starts with a namespace.
+ """
+ return name[ 0:1 ] == '/'
+
+ class InvalidPathError( ValueError ):
+ def __init__( self, invalid_path ):
+ super( Context.InvalidPathError, self ).__init__( "Invalid path '%s'" % invalid_path )
+
+ def absolute_name( self, name ):
+ """
+ Returns the absolute form of the specified resource name. If the specified name is
+ already absolute, that name will be returned unchanged, otherwise the given name will be
+ prefixed with the namespace this object was configured with.
+
+ Relative names starting with underscores are disallowed.
+
+ >>> ctx = Context( 'us-west-1b', namespace='/' )
+ >>> ctx.absolute_name('bar')
+ '/bar'
+ >>> ctx.absolute_name('/bar')
+ '/bar'
+ >>> ctx.absolute_name('')
+ '/'
+ >>> ctx.absolute_name('/')
+ '/'
+ >>> ctx.absolute_name('_bar')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_bar'
+ >>> ctx.absolute_name('/_bar')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_bar'
+
+ >>> ctx = Context( 'us-west-1b', namespace='/foo/' )
+ >>> ctx.absolute_name('bar')
+ '/foo/bar'
+ >>> ctx.absolute_name('bar/')
+ '/foo/bar/'
+ >>> ctx.absolute_name('bar1/bar2')
+ '/foo/bar1/bar2'
+ >>> ctx.absolute_name('/bar')
+ '/bar'
+ >>> ctx.absolute_name('')
+ '/foo/'
+ >>> ctx.absolute_name('/')
+ '/'
+ >>> ctx.absolute_name('_bar')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/foo/_bar'
+ >>> ctx.absolute_name('/_bar')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_bar'
+ """
+ if self.is_absolute_name( name ):
+ result = name
+ else:
+ result = self.namespace + name
+ if not self.name_re.match( result ):
+ raise self.InvalidPathError( result )
+ return result
+
+ def to_aws_name( self, name ):
+ """
+ Returns a transliteration of the name that safe to use for resource names on AWS. If the
+ given name is relative, it converted to its absolute form before the transliteration.
+
+ The transliteration uses two consequitive '_' to encode a single '_' and a single '_' to
+ separate the name components. AWS-safe names are by definition absolute such that the
+ leading separator can be removed. This leads to fairly readable AWS-safe names,
+ especially for names in the root namespace, where the transliteration is the identity
+ function if the input does not contain any '_'.
+
+ This scheme only works if name components don't start with '_'. Without that condition,
+ '/_' would become '___' the inverse of which is '_/'.
+
+ >>> ctx = Context( 'us-west-1b', namespace='/' )
+
+ >>> ctx.to_aws_name( 'foo' )
+ 'foo'
+ >>> ctx.from_aws_name( 'foo' )
+ 'foo'
+
+ Illegal paths that would introduce ambiguity need to raise an exception
+ >>> ctx.to_aws_name('/_')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_'
+ >>> ctx.to_aws_name('/_/')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_/'
+ >>> ctx.from_aws_name('___')
+ Traceback (most recent call last):
+ ....
+ InvalidPathError: Invalid path '/_/'
+
+ >>> ctx.to_aws_name( 'foo_bar')
+ 'foo__bar'
+ >>> ctx.from_aws_name( 'foo__bar')
+ 'foo_bar'
+
+ >>> ctx.to_aws_name( '/sub_ns/foo_bar')
+ 'sub__ns_foo__bar'
+ >>> ctx.to_aws_name( 'sub_ns/foo_bar')
+ 'sub__ns_foo__bar'
+ >>> ctx.from_aws_name( 'sub__ns_foo__bar' )
+ 'sub_ns/foo_bar'
+
+ >>> ctx.to_aws_name( 'g_/' )
+ 'g___'
+ >>> ctx.from_aws_name( 'g___' )
+ 'g_/'
+
+ >>> ctx = Context( 'us-west-1b', namespace='/this_ns/' )
+
+ >>> ctx.to_aws_name( 'foo' )
+ 'this__ns_foo'
+ >>> ctx.from_aws_name( 'this__ns_foo' )
+ 'foo'
+
+ >>> ctx.to_aws_name( 'foo_bar')
+ 'this__ns_foo__bar'
+ >>> ctx.from_aws_name( 'this__ns_foo__bar')
+ 'foo_bar'
+
+ >>> ctx.to_aws_name( '/other_ns/foo_bar' )
+ 'other__ns_foo__bar'
+ >>> ctx.from_aws_name( 'other__ns_foo__bar' )
+ '/other_ns/foo_bar'
+
+ >>> ctx.to_aws_name( 'other_ns/foo_bar' )
+ 'this__ns_other__ns_foo__bar'
+ >>> ctx.from_aws_name( 'this__ns_other__ns_foo__bar' )
+ 'other_ns/foo_bar'
+
+ >>> ctx.to_aws_name( '/this_ns/foo_bar' )
+ 'this__ns_foo__bar'
+ >>> ctx.from_aws_name( 'this__ns_foo__bar' )
+ 'foo_bar'
+ """
+ name = self.absolute_name( name )
+ assert name.startswith( '/' )
+ return name[ 1: ].replace( '_', '__' ).replace( '/', '_' )
+
+ def from_aws_name( self, name ):
+ """
+ The inverse of to_aws_name(), except that the namespace is stripped from the input if it
+ is relative to this context's name space.
+
+ >>> zone = 'us-west-1b'
+ >>> Context( zone, namespace='/foo/' ).from_aws_name('bar__x')
+ '/bar_x'
+ >>> Context( zone, namespace='/foo_x/' ).from_aws_name('foo__x_bar')
+ 'bar'
+ >>> Context( zone, namespace='/' ).from_aws_name('foo__x_bar__x')
+ 'foo_x/bar_x'
+ >>> Context( zone, namespace='/bla/' ).from_aws_name('foo__x_bar__x')
+ '/foo_x/bar_x'
+ """
+ name = '_'.join( s.replace( '_', '/' ) for s in name.split( '__' ) )
+ name = '/' + name
+ if not self.name_re.match( name ):
+ raise self.InvalidPathError( name )
+ if name.startswith( self.namespace ):
+ name = name[ len( self.namespace ): ]
+ return name
+
+ def base_name( self, name ):
+ """
+ Return the last component of a name, absolute or relative.
+
+ >>> ctx = Context( 'us-west-1b', namespace='/foo/bar/')
+ >>> ctx.base_name('')
+ ''
+ >>> ctx.base_name('/')
+ ''
+ >>> ctx.base_name('/a')
+ 'a'
+ >>> ctx.base_name('/a/')
+ ''
+ >>> ctx.base_name('/a/b')
+ 'b'
+ >>> ctx.base_name('/a/b/')
+ ''
+ """
+ return name.split( '/' )[ -1 ]
+
+ def contains_name( self, name ):
+ return not self.is_absolute_name( name ) or name.startswith( self.namespace )
+
+ def contains_aws_name( self, aws_name ):
+ """
+ >>> def c(n): return Context( 'us-west-1b', namespace=n)
+ >>> c('/foo/' ).contains_aws_name('bar_x')
+ False
+ >>> c('/foo/' ).contains_aws_name('foo_x')
+ True
+ >>> c('/foo/' ).contains_aws_name('foo_bar_x')
+ True
+ >>> c('/foo/' ).contains_aws_name('bar_foo_x')
+ False
+ >>> c('/' ).contains_aws_name('bar_x')
+ True
+ >>> c('/' ).contains_aws_name('foo_x')
+ True
+ >>> c('/' ).contains_aws_name('foo_bar_x')
+ True
+ >>> c('/' ).contains_aws_name('bar_foo_x')
+ True
+ """
+ return self.contains_name( self.from_aws_name( aws_name ) )
+
+ def try_contains_aws_name( self, aws_name ):
+ try:
+ return self.contains_aws_name( aws_name )
+ except self.InvalidPathError:
+ return False
+
+ @property
+ @memoize
+ def account( self ):
+ try:
+ arn = self.iam.get_user( ).arn
+ except:
+ # Agent boxes run with IAM role credentials instead of user credentials.
+ arn = get_instance_metadata( )[ 'iam' ][ 'info' ][ 'InstanceProfileArn' ]
+ _, partition, service, region, account, resource = arn.split( ':', 6 )
+ return account
+
+ @property
+ @memoize
+ def s3_bucket_name( self ):
+ return self.account + '-cgcloud'
+
+ ssh_pubkey_s3_key_prefix = 'ssh_pubkey:'
+
+ @property
+ def s3_location( self ):
+ if self.region == 'us-east-1':
+ return ''
+ else:
+ return self.region
+
+ def upload_ssh_pubkey( self, ssh_pubkey, fingerprint ):
+ bucket = self.s3.lookup( self.s3_bucket_name )
+ if bucket is None:
+ bucket = self.s3.create_bucket( self.s3_bucket_name,
+ location=self.s3_location )
+ s3_entry = S3Key( bucket )
+ s3_entry.key = self.ssh_pubkey_s3_key_prefix + fingerprint
+ s3_entry.set_contents_from_string( ssh_pubkey )
+
+ def register_ssh_pubkey( self, ec2_keypair_name, ssh_pubkey, force=False ):
+ """
+ Import the given OpenSSH public key as a 'key pair' into EC2.
+
+ There is no way to get to the actual public key once it has been imported to EC2.
+ Openstack lets you do that and I don't see why Amazon decided to omit this functionality.
+ To work around this, we store the public key in S3, identified by the public key's
+ fingerprint. As long as we always check the fingerprint of the downloaded public SSH key
+ against that of the EC2 keypair key, this method is resilient against malicious
+ modifications of the keys stored in S3.
+
+ :param ec2_keypair_name: the desired name of the EC2 key pair
+
+ :param ssh_pubkey: the SSH public key in OpenSSH's native format, i.e. format that is used in ~/
+ .ssh/authorized_keys
+
+ :param force: overwrite existing EC2 keypair of the given name
+ """
+ fingerprint = ec2_keypair_fingerprint( ssh_pubkey, reject_private_keys=True )
+ ec2_keypair = self.ec2.get_key_pair( ec2_keypair_name )
+ if ec2_keypair is not None:
+ if ec2_keypair.name != ec2_keypair_name:
+ raise AssertionError( "Key pair names don't match." )
+ if ec2_keypair.fingerprint != fingerprint:
+ if force:
+ self.ec2.delete_key_pair( ec2_keypair_name )
+ ec2_keypair = None
+ else:
+ raise UserError(
+ "Key pair %s already exists in EC2, but its fingerprint %s is "
+ "different from the fingerprint %s of the key to be imported. Use "
+ "the force option to overwrite the existing key pair." %
+ (ec2_keypair.name, ec2_keypair.fingerprint, fingerprint) )
+
+ if ec2_keypair is None:
+ ec2_keypair = self.ec2.import_key_pair( ec2_keypair_name, ssh_pubkey )
+ assert ec2_keypair.fingerprint == fingerprint
+
+ self.upload_ssh_pubkey( ssh_pubkey, fingerprint )
+ self.__publish_key_update_agent_message( )
+ return ec2_keypair
+
+ def expand_keypair_globs( self, globs ):
+ """
+ Returns a list of EC2 key pair objects matching the specified globs. The order of the
+ objects in the returned list will be consistent with the order of the globs and it will
+ not contain any elements more than once. In other words, the returned list will start
+ with all key pairs matching the first glob, followed by key pairs matching the second
+ glob but not the first glob and so on.
+
+ :rtype: list of KeyPair
+ """
+
+ def iam_lookup( glob ):
+ if glob.startswith( '@@' ):
+ return (_.user_name for _ in self.iam.get_group( 'developers' ).users)
+ elif glob.startswith( '@' ):
+ return (self.iam.get_user( glob[ 1: ] ).user_name,)
+ else:
+ return (glob,)
+
+ globs = itertools.chain.from_iterable( map( iam_lookup, globs ) )
+
+ result = [ ]
+ keypairs = dict( (keypair.name, keypair) for keypair in self.ec2.get_all_key_pairs( ) )
+ for glob in globs:
+ i = len( result )
+ for name, keypair in keypairs.iteritems( ):
+ if fnmatch.fnmatch( name, glob ):
+ result.append( keypair )
+
+ # since we can't modify the set during iteration
+ for keypair in result[ i: ]:
+ keypairs.pop( keypair.name )
+ return result
+
+ def download_ssh_pubkey( self, ec2_keypair ):
+ try:
+ bucket = self.s3.get_bucket( self.s3_bucket_name )
+ s3_entry = S3Key( bucket )
+ s3_entry.key = self.ssh_pubkey_s3_key_prefix + ec2_keypair.fingerprint
+ ssh_pubkey = s3_entry.get_contents_as_string( )
+ except S3ResponseError as e:
+ if e.status == 404:
+ raise UserError(
+ "There is no matching SSH pub key stored in S3 for EC2 key pair %s. Has "
+ "it been registered, e.g using the cgcloud's register-key command?" %
+ ec2_keypair.name )
+ else:
+ raise
+ fingerprint_len = len( ec2_keypair.fingerprint.split( ':' ) )
+ if fingerprint_len == 20: # 160 bit SHA-1
+ # The fingerprint is that of a private key. We can't get at the private key so we
+ # can't verify the public key either. So this is inherently insecure. However,
+ # remember that the only reason why we are dealing with n EC2-generated private
+ # key is that the Jenkins' EC2 plugin expects a 20 byte fingerprint. See
+ # https://issues.jenkins-ci.org/browse/JENKINS-20142 for details. Once that issue
+ # is fixed, we can switch back to just using imported keys and 16-byte fingerprints.
+ pass
+ elif fingerprint_len == 16: # 128 bit MD5
+ fingerprint = ec2_keypair_fingerprint( ssh_pubkey )
+ if ec2_keypair.fingerprint != fingerprint:
+ raise UserError(
+ "Fingerprint mismatch for key %s! Expected %s but got %s. The EC2 keypair "
+ "doesn't match the public key stored in S3." %
+ (ec2_keypair.name, ec2_keypair.fingerprint, fingerprint) )
+ return ssh_pubkey
+
+ @property
+ @memoize
+ def iam_user_name( self ):
+ try:
+ return self.iam.get_user( ).user_name
+ except:
+ log.warn( "IAMConnection.get_user() failed.", exc_info=True )
+ return None
+
+ current_user_placeholder = '__me__'
+
+ @staticmethod
+ def drop_hostname( email ):
+ """
+ >>> Context.drop_hostname("foo")
+ 'foo'
+ >>> Context.drop_hostname("foo at bar.com")
+ 'foo'
+ >>> Context.drop_hostname("")
+ ''
+ >>> Context.drop_hostname("@")
+ ''
+ """
+ try:
+ n = email.index( "@" )
+ except ValueError:
+ return email
+ else:
+ return email[ 0:n ]
+
+ def resolve_me( self, s, drop_hostname=True ):
+ placeholder = self.current_user_placeholder
+ if placeholder in s:
+ try:
+ me = os.environ[ 'CGCLOUD_ME' ]
+ except KeyError:
+ me = self.iam_user_name
+ if not me:
+ raise UserError(
+ "Can't determine current IAM user name. Be sure to put valid AWS credentials "
+ "in ~/.boto or ~/.aws/credentials. For details, refer to %s. On an EC2 "
+ "instance that is authorized via IAM roles, you can set the CGCLOUD_ME "
+ "environment variable (uncommon)." %
+ 'http://boto.readthedocs.org/en/latest/boto_config_tut.html' )
+ if drop_hostname:
+ me = self.drop_hostname( me )
+ me = me.lower() # namespaces must be lower case
+ return s.replace( placeholder, me )
+ else:
+ return s
+
+ def setup_iam_ec2_role( self, role_name, policies ):
+ aws_role_name = self.to_aws_name( role_name )
+ try:
+ self.iam.create_role( aws_role_name, assume_role_policy_document=json.dumps( {
+ "Version": "2012-10-17",
+ "Statement": [ {
+ "Effect": "Allow",
+ "Principal": { "Service": [ "ec2.amazonaws.com" ] },
+ "Action": [ "sts:AssumeRole" ] }
+ ] } ) )
+ except BotoServerError as e:
+ if e.status == 409 and e.error_code == 'EntityAlreadyExists':
+ pass
+ else:
+ raise
+
+ self.__setup_entity_policies( aws_role_name, policies,
+ list_policies=self.iam.list_role_policies,
+ delete_policy=self.iam.delete_role_policy,
+ get_policy=self.iam.get_role_policy,
+ put_policy=self.iam.put_role_policy )
+
+ return aws_role_name
+
+ def setup_iam_user_policies( self, user_name, policies ):
+ try:
+ self.iam.create_user( user_name )
+ except BotoServerError as e:
+ if e.status == 409 and e.error_code == 'EntityAlreadyExists':
+ pass
+ else:
+ raise
+ self.__setup_entity_policies( user_name, policies,
+ list_policies=self.iam.get_all_user_policies,
+ delete_policy=self.iam.delete_user_policy,
+ get_policy=self.iam.get_user_policy,
+ put_policy=self.iam.put_user_policy )
+
+ def __setup_entity_policies( self, entity_name, policies,
+ list_policies, delete_policy, get_policy, put_policy ):
+ # Delete superfluous policies
+ policy_names = set( list_policies( entity_name ).policy_names )
+ for policy_name in policy_names.difference( set( policies.keys( ) ) ):
+ delete_policy( entity_name, policy_name )
+
+ # Create expected policies
+ for policy_name, policy in policies.iteritems( ):
+ current_policy = None
+ try:
+ current_policy = json.loads( urllib.unquote(
+ get_policy( entity_name, policy_name ).policy_document ) )
+ except BotoServerError as e:
+ if e.status == 404 and e.error_code == 'NoSuchEntity':
+ pass
+ else:
+ raise
+ if current_policy != policy:
+ put_policy( entity_name, policy_name, json.dumps( policy ) )
+
+ _agent_topic_name = "cgcloud-agent-notifications"
+
+ @property
+ def agent_queue_name( self ):
+ host_qualifier = socket.gethostname( ).replace( '.', '-' )
+ return self._agent_topic_name + '/' + host_qualifier
+
+ @property
+ @memoize
+ def agent_topic_arn( self ):
+ """
+ The ARN of the SNS topic on which the agents listen for messages and returns its ARN.
+ """
+ # Note that CreateTopic is idempotent
+ return self.sns.create_topic( self._agent_topic_name )[
+ 'CreateTopicResponse' ][ 'CreateTopicResult' ][ 'TopicArn' ]
+
+ def publish_agent_message( self, message ):
+ """
+ :type message: Message
+ """
+ self.sns.publish( self.agent_topic_arn, message.to_sns( ) )
+
+ def __publish_key_update_agent_message( self ):
+ self.publish_agent_message( Message( type=Message.TYPE_UPDATE_SSH_KEYS ) )
+
+ def reset_namespace_security( self ):
+ """
+ Delete all
+
+ - IAM instance profiles,
+ - IAM roles,
+ - IAM policies and
+ - EC2 security groups
+
+ associated with this context, or rather the namespace this context represents.
+ """
+ self.delete_instance_profiles( self.local_instance_profiles( ) )
+ self.delete_roles( self.local_roles( ) )
+ self.delete_security_groups( self.local_security_groups( ) )
+
+ def local_instance_profiles( self ):
+ return [ p for p in self._get_all_instance_profiles( )
+ if self.try_contains_aws_name( p.instance_profile_name ) ]
+
+ def _get_all_instance_profiles( self ):
+ return self._pager( self.iam.list_instance_profiles, 'instance_profiles' )
+
+ def _pager( self, requestor_callable, result_attribute_name ):
+ marker = None
+ while True:
+ result = requestor_callable( marker=marker )
+ for p in getattr( result, result_attribute_name ):
+ yield p
+ if result.is_truncated == 'true':
+ marker = result.marker
+ else:
+ break
+
+ def delete_instance_profiles( self, instance_profiles ):
+ log.debug( 'Deleting profiles %r', instance_profiles )
+ for p in instance_profiles:
+ profile_name = p.instance_profile_name
+ with out_exception( 'instance profile', profile_name ):
+ # currently EC2 allows only one role per profile
+ if p.roles:
+ role_name = p.roles.member.role_name
+ log.debug( 'Removing role %s from profile %s', role_name, profile_name )
+ self.iam.remove_role_from_instance_profile( profile_name, role_name )
+ log.debug( 'Deleting profile %s', profile_name )
+ self.iam.delete_instance_profile( profile_name )
+
+ def local_roles( self ):
+ return [ r for r in self._get_all_roles( ) if self.try_contains_aws_name( r.role_name ) ]
+
+ def _get_all_roles( self ):
+ return self._pager( self.iam.list_roles, 'roles' )
+
+ def delete_roles( self, roles ):
+ log.debug( 'Deleting roles %r', roles )
+ for r in roles:
+ with out_exception( 'role', r.role_name ):
+ for policy_name in self.iam.list_role_policies( r.role_name ).policy_names:
+ self.iam.delete_role_policy( r.role_name, policy_name )
+ self.iam.delete_role( r.role_name )
+
+ def local_security_groups( self ):
+ return [ sg for sg in self.ec2.get_all_security_groups( )
+ if self.try_contains_aws_name( sg.name ) ]
+
+ def delete_security_groups( self, security_groups ):
+ log.debug( 'Deleting security groups %r', security_groups )
+ for sg in security_groups:
+ with out_exception( 'security group', sg.name ):
+ sg.delete( )
+
+ def unused_fingerprints( self ):
+ """
+ Find all unused fingerprints. This method works globally and does not consider the
+ namespace represented by this context.
+
+ :rtype: set[str]
+ """
+ keypairs = self.expand_keypair_globs( '*' )
+ ec2_fingerprints = set( keypair.fingerprint for keypair in keypairs )
+ bucket = self.s3.get_bucket( self.s3_bucket_name, validate=False )
+ prefix = self.ssh_pubkey_s3_key_prefix
+ s3_fingerprints = set( key.name[ len( prefix ): ] for key in bucket.list( prefix=prefix ) )
+ unused_fingerprints = s3_fingerprints - ec2_fingerprints
+ return unused_fingerprints
+
+ def delete_fingerprints( self, fingerprints ):
+ """
+ Delete the given fingerprints.
+
+ :type fingerprints: Iterable(str)
+ """
+ bucket = self.s3.get_bucket( self.s3_bucket_name, validate=False )
+ key_names = [ self.ssh_pubkey_s3_key_prefix + fingerprint for fingerprint in fingerprints ]
+ bucket.delete_keys( key_names )
+
+ def unused_snapshots( self ):
+ """
+ Find all snapshots created for AMIs owned by the current AWS account for which the AMI
+ has since been unregistered. This method works globally and does not consider the
+ namespace represented by this context.
+
+ :rtype: set[str]
+ """
+ all_snapshots = self.ec2.get_all_snapshots(
+ owner='self',
+ filters=dict( description='Created by CreateImage*' ) )
+ all_snapshots = set( snapshot.id for snapshot in all_snapshots )
+ used_snapshots = set( bdt.snapshot_id
+ for image in self.ec2.get_all_images( owners=[ 'self' ] )
+ for bdt in image.block_device_mapping.itervalues( )
+ if bdt.snapshot_id is not None )
+ return all_snapshots - used_snapshots
+
+ def delete_snapshots( self, unused_snapshots ):
+ """
+ Delete the snapshots with the given IDs.
+
+ :type unused_snapshots: collections.Iterable[str]
+ """
+ for snapshot_id in unused_snapshots:
+ log.info( 'Deleting snapshot %s', snapshot_id )
+ self.ec2.delete_snapshot( snapshot_id )
+
+
+ at contextmanager
+def out_exception( object_type, object_name ):
+ try:
+ yield
+ except:
+ log.warn( "Failed to remove %s '%s'", object_type, object_name, exc_info=True )
diff --git a/lib/src/cgcloud/lib/ec2.py b/lib/src/cgcloud/lib/ec2.py
new file mode 100644
index 0000000..8ce7af2
--- /dev/null
+++ b/lib/src/cgcloud/lib/ec2.py
@@ -0,0 +1,428 @@
+import errno
+import logging
+import time
+from collections import Iterator
+from operator import attrgetter
+
+from bd2k.util.exceptions import panic
+from bd2k.util.retry import retry
+from boto.ec2.ec2object import TaggedEC2Object
+from boto.ec2.instance import Instance
+from boto.ec2.spotinstancerequest import SpotInstanceRequest
+from boto.exception import EC2ResponseError, BotoServerError
+
+from cgcloud.lib.util import UserError
+
+a_short_time = 5
+
+a_long_time = 60 * 60
+
+log = logging.getLogger( __name__ )
+
+
+def not_found( e ):
+ return e.error_code.endswith( '.NotFound' )
+
+
+def retry_ec2( retry_after=a_short_time, retry_for=10 * a_short_time, retry_while=not_found ):
+ t = retry_after
+ return retry( delays=(t, t, t * 2, t * 4), timeout=retry_for, predicate=retry_while )
+
+
+class EC2VolumeHelper( object ):
+ """
+ A helper for creating, looking up and attaching an EBS volume in EC2
+ """
+
+ def __init__( self, ec2, name, size, availability_zone, volume_type="standard" ):
+ """
+ :param ec2: the Boto EC2 connection object
+ :type ec2: boto.ec2.connection.EC2Connection
+ """
+ super( EC2VolumeHelper, self ).__init__( )
+ self.availability_zone = availability_zone
+ self.ec2 = ec2
+ self.name = name
+ self.volume_type = volume_type
+ volume = self.__lookup( )
+ if volume is None:
+ log.info( "Creating volume %s, ...", self.name )
+ volume = self.ec2.create_volume( size, availability_zone, volume_type=self.volume_type )
+ self.__wait_transition( volume, { 'creating' }, 'available' )
+ volume.add_tag( 'Name', self.name )
+ log.info( '... created %s.', volume.id )
+ volume = self.__lookup( )
+ self.volume = volume
+
+ def attach( self, instance_id, device ):
+ if self.volume.attach_data.instance_id == instance_id:
+ log.info( "Volume '%s' already attached to instance '%s'." %
+ (self.volume.id, instance_id) )
+ else:
+ self.__assert_attachable( )
+ self.ec2.attach_volume( volume_id=self.volume.id,
+ instance_id=instance_id,
+ device=device )
+ self.__wait_transition( self.volume, { 'available' }, 'in-use' )
+ if self.volume.attach_data.instance_id != instance_id:
+ raise UserError( "Volume %s is not attached to this instance." )
+
+ def __lookup( self ):
+ """
+ Ensure that an EBS volume of the given name is available in the current availability zone.
+ If the EBS volume exists but has been placed into a different zone, or if it is not
+ available, an exception will be thrown.
+
+ :rtype: boto.ec2.volume.Volume
+ """
+ volumes = self.ec2.get_all_volumes( filters={ 'tag:Name': self.name } )
+ if len( volumes ) < 1:
+ return None
+ if len( volumes ) > 1:
+ raise UserError( "More than one EBS volume named %s" % self.name )
+ return volumes[ 0 ]
+
+ @staticmethod
+ def __wait_transition( volume, from_states, to_state ):
+ wait_transition( volume, from_states, to_state, attrgetter( 'status' ) )
+
+ def __assert_attachable( self ):
+ if self.volume.status != 'available':
+ raise UserError( "EBS volume %s is not available." % self.name )
+ expected_zone = self.availability_zone
+ if self.volume.zone != expected_zone:
+ raise UserError( "Availability zone of EBS volume %s is %s but should be %s."
+ % (self.name, self.volume.zone, expected_zone) )
+
+
+class UnexpectedResourceState( Exception ):
+ def __init__( self, resource, to_state, state ):
+ super( UnexpectedResourceState, self ).__init__(
+ "Expected state of %s to be '%s' but got '%s'" %
+ (resource, to_state, state) )
+
+
+def wait_transition( resource, from_states, to_state, state_getter=attrgetter( 'state' ) ):
+ """
+ Wait until the specified EC2 resource (instance, image, volume, ...) transitions from any
+ of the given 'from' states to the specified 'to' state. If the instance is found in a state
+ other that the to state or any of the from states, an exception will be thrown.
+
+ :param resource: the resource to monitor
+ :param from_states:
+ a set of states that the resource is expected to be in before the transition occurs
+ :param to_state: the state of the resource when this method returns
+ """
+ state = state_getter( resource )
+ while state in from_states:
+ time.sleep( a_short_time )
+ for attempt in retry_ec2( ):
+ with attempt:
+ resource.update( validate=True )
+ state = state_getter( resource )
+ if state != to_state:
+ raise UnexpectedResourceState( resource, to_state, state )
+
+
+def running_on_ec2( ):
+ try:
+ with open( '/sys/hypervisor/uuid' ) as f:
+ return f.read( 3 ) == 'ec2'
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ return False
+ else:
+ raise
+
+
+from collections import namedtuple
+
+InstanceType = namedtuple( 'InstanceType', [
+ 'name', # the API name of the instance type
+ 'cores', # the number of cores
+ 'ecu', # the computational power of the core times the number of cores
+ 'memory', # RAM in GB
+ 'virtualization_types', # the supported virtualization types, in order of preference
+ 'disks', # the number of ephemeral (aka 'instance store') volumes
+ 'disk_type', # the type of ephemeral volume
+ 'disk_capacity', # the capacity of each ephemeral volume in GB
+ 'spot_availability' # can this instance type be used on the spot market?
+] )
+
+hvm = 'hvm' # hardware virtualization
+pv = 'paravirtual' # para-virtualization
+ssd = 'SSD' # solid-state disk
+hdd = 'HDD' # spinning disk
+variable_ecu = -1 # variable ecu
+
+_ec2_instance_types = [
+ # current generation instance types
+ InstanceType( 't2.micro', 1, variable_ecu, 1, [ hvm ], 0, None, 0, False ),
+ InstanceType( 't2.small', 1, variable_ecu, 2, [ hvm ], 0, None, 0, False ),
+ InstanceType( 't2.medium', 2, variable_ecu, 4, [ hvm ], 0, None, 0, False ),
+ InstanceType( 't2.large', 2, variable_ecu, 8, [ hvm ], 0, None, 0, False ),
+
+ InstanceType( 'm3.medium', 1, 3, 3.75, [ hvm, pv ], 1, ssd, 4, True ),
+ InstanceType( 'm3.large', 2, 6.5, 7.5, [ hvm, pv ], 1, ssd, 32, True ),
+ InstanceType( 'm3.xlarge', 4, 13, 15, [ hvm, pv ], 2, ssd, 40, True ),
+ InstanceType( 'm3.2xlarge', 8, 26, 30, [ hvm, pv ], 2, ssd, 80, True ),
+
+ InstanceType( 'm4.large', 2, 6.5, 8, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'm4.xlarge', 4, 13, 16, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'm4.2xlarge', 8, 26, 32, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'm4.4xlarge', 16, 53.5, 64, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'm4.10xlarge', 40, 124.5, 160, [ hvm ], 0, None, 0, True ),
+
+ InstanceType( 'c4.large', 2, 8, 3.75, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'c4.xlarge', 4, 16, 7.5, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'c4.2xlarge', 8, 31, 15, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'c4.4xlarge', 16, 62, 30, [ hvm ], 0, None, 0, True ),
+ InstanceType( 'c4.8xlarge', 36, 132, 60, [ hvm ], 0, None, 0, True ),
+
+ InstanceType( 'c3.large', 2, 7, 3.75, [ hvm, pv ], 2, ssd, 16, True ),
+ InstanceType( 'c3.xlarge', 4, 14, 7.5, [ hvm, pv ], 2, ssd, 40, True ),
+ InstanceType( 'c3.2xlarge', 8, 28, 15, [ hvm, pv ], 2, ssd, 80, True ),
+ InstanceType( 'c3.4xlarge', 16, 55, 30, [ hvm, pv ], 2, ssd, 160, True ),
+ InstanceType( 'c3.8xlarge', 32, 108, 60, [ hvm, pv ], 2, ssd, 320, True ),
+
+ InstanceType( 'g2.2xlarge', 8, 26, 15, [ hvm ], 1, ssd, 60, True ),
+
+ InstanceType( 'r3.large', 2, 6.5, 15, [ hvm ], 1, ssd, 32, True ),
+ InstanceType( 'r3.xlarge', 4, 13, 30.5, [ hvm ], 1, ssd, 80, True ),
+ InstanceType( 'r3.2xlarge', 8, 26, 61, [ hvm ], 1, ssd, 160, True ),
+ InstanceType( 'r3.4xlarge', 16, 52, 122, [ hvm ], 1, ssd, 320, True ),
+ InstanceType( 'r3.8xlarge', 32, 104, 244, [ hvm ], 2, ssd, 320, True ),
+
+ InstanceType( 'i2.xlarge', 4, 14, 30.5, [ hvm ], 1, ssd, 800, False ),
+ InstanceType( 'i2.2xlarge', 8, 27, 61, [ hvm ], 2, ssd, 800, False ),
+ InstanceType( 'i2.4xlarge', 16, 53, 122, [ hvm ], 4, ssd, 800, False ),
+ InstanceType( 'i2.8xlarge', 32, 104, 244, [ hvm ], 8, ssd, 800, False ),
+
+ InstanceType( 'd2.xlarge', 4, 14, 30.5, [ hvm ], 3, hdd, 2000, True ),
+ InstanceType( 'd2.2xlarge', 8, 28, 61, [ hvm ], 6, hdd, 2000, True ),
+ InstanceType( 'd2.4xlarge', 16, 56, 122, [ hvm ], 12, hdd, 2000, True ),
+ InstanceType( 'd2.8xlarge', 36, 116, 244, [ hvm ], 24, hdd, 2000, True ),
+
+ # previous generation instance types
+ InstanceType( 'm1.small', 1, 1, 1.7, [ pv ], 1, hdd, 160, True ),
+ InstanceType( 'm1.medium', 1, 2, 3.75, [ pv ], 1, hdd, 410, True ),
+ InstanceType( 'm1.large', 2, 4, 7.5, [ pv ], 2, hdd, 420, True ),
+ InstanceType( 'm1.xlarge', 4, 8, 15, [ pv ], 4, hdd, 420, True ),
+
+ InstanceType( 'c1.medium', 2, 5, 1.7, [ pv ], 1, hdd, 350, True ),
+ InstanceType( 'c1.xlarge', 8, 20, 7, [ pv ], 4, hdd, 420, True ),
+
+ InstanceType( 'cc2.8xlarge', 32, 88, 60.5, [ hvm ], 4, hdd, 840, True ),
+
+ InstanceType( 'm2.xlarge', 2, 6.5, 17.1, [ pv ], 1, hdd, 420, True ),
+ InstanceType( 'm2.2xlarge', 4, 13, 34.2, [ pv ], 1, hdd, 850, True ),
+ InstanceType( 'm2.4xlarge', 8, 26, 68.4, [ pv ], 2, hdd, 840, True ),
+
+ InstanceType( 'cr1.8xlarge', 32, 88, 244, [ hvm ], 2, ssd, 120, True ),
+
+ InstanceType( 'hi1.4xlarge', 16, 35, 60.5, [ hvm, pv ], 2, ssd, 1024, True ),
+ InstanceType( 'hs1.8xlarge', 16, 35, 117, [ hvm, pv ], 24, hdd, 2048, False ),
+
+ InstanceType( 't1.micro', 1, variable_ecu, 0.615, [ pv ], 0, None, 0, True ) ]
+
+ec2_instance_types = dict( (_.name, _) for _ in _ec2_instance_types )
+
+
+def wait_instances_running( ec2, instances ):
+ """
+ Wait until no instance in the given iterable is 'pending'. Yield every instance that
+ entered the running state as soon as it does.
+
+ :param boto.ec2.connection.EC2Connection ec2: the EC2 connection to use for making requests
+ :param Iterator[Instance] instances: the instances to wait on
+ :rtype: Iterator[Instance]
+ """
+ running_ids = set( )
+ other_ids = set( )
+ while True:
+ pending_ids = set( )
+ for i in instances:
+ if i.state == 'pending':
+ pending_ids.add( i.id )
+ elif i.state == 'running':
+ assert i.id not in running_ids
+ running_ids.add( i.id )
+ yield i
+ else:
+ assert i.id not in other_ids
+ other_ids.add( i.id )
+ yield i
+ log.info( '%i instance(s) pending, %i running, %i other.',
+ *map( len, (pending_ids, running_ids, other_ids) ) )
+ if not pending_ids:
+ break
+ seconds = max( a_short_time, min( len( pending_ids ), 10 * a_short_time ) )
+ log.info( 'Sleeping for %is', seconds )
+ time.sleep( seconds )
+ for attempt in retry_ec2( ):
+ with attempt:
+ instances = ec2.get_only_instances( list( pending_ids ) )
+
+
+def wait_spot_requests_active( ec2, requests, timeout=None, tentative=False ):
+ """
+ Wait until no spot request in the given iterator is in the 'open' state or, optionally,
+ a timeout occurs. Yield spot requests as soon as they leave the 'open' state.
+
+ :param Iterator[SpotInstanceRequest] requests:
+
+ :param float timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
+ timeout occurs, the remaining open requests will be cancelled.
+
+ :param bool tentative: if True, give up on a spot request at the earliest indication of it
+ not being fulfilled immediately
+
+ :rtype: Iterator[list[SpotInstanceRequest]]
+ """
+
+ if timeout is not None:
+ timeout = time.time( ) + timeout
+ active_ids = set( )
+ other_ids = set( )
+ open_ids = None
+
+ def cancel( ):
+ log.warn( 'Cancelling remaining %i spot requests.', len( open_ids ) )
+ ec2.cancel_spot_instance_requests( list( open_ids ) )
+
+ def spot_request_not_found( e ):
+ error_code = 'InvalidSpotInstanceRequestID.NotFound'
+ return isinstance( e, EC2ResponseError ) and e.error_code == error_code
+
+ try:
+ while True:
+ open_ids, eval_ids, fulfill_ids = set( ), set( ), set( )
+ batch = [ ]
+ for r in requests:
+ if r.state == 'open':
+ open_ids.add( r.id )
+ if r.status.code == 'pending-evaluation':
+ eval_ids.add( r.id )
+ elif r.status.code == 'pending-fulfillment':
+ fulfill_ids.add( r.id )
+ else:
+ log.info( 'Request %s entered status %s indicating that it will not be '
+ 'fulfilled anytime soon.', r.id, r.status.code )
+ elif r.state == 'active':
+ assert r.id not in active_ids
+ active_ids.add( r.id )
+ batch.append( r )
+ else:
+ assert r.id not in other_ids
+ other_ids.add( r.id )
+ batch.append( r )
+ if batch:
+ yield batch
+ log.info( '%i spot requests(s) are open (%i of which are pending evaluation and %i '
+ 'are pending fulfillment), %i are active and %i are in another state.',
+ *map( len, (open_ids, eval_ids, fulfill_ids, active_ids, other_ids) ) )
+ if not open_ids or tentative and not eval_ids and not fulfill_ids:
+ break
+ sleep_time = 2 * a_short_time
+ if timeout is not None and time.time( ) + sleep_time >= timeout:
+ log.warn( 'Timed out waiting for spot requests.' )
+ break
+ log.info( 'Sleeping for %is', sleep_time )
+ time.sleep( sleep_time )
+ for attempt in retry_ec2( retry_while=spot_request_not_found ):
+ with attempt:
+ requests = ec2.get_all_spot_instance_requests( list( open_ids ) )
+ except:
+ if open_ids:
+ with panic( log ):
+ cancel( )
+ raise
+ else:
+ if open_ids:
+ cancel( )
+
+
+def create_spot_instances( ec2, price, image_id, spec,
+ num_instances=1, timeout=None, tentative=False, tags=None ):
+ """
+ :rtype: Iterator[list[Instance]]
+ """
+
+ def spotRequestNotFound( e ):
+ return e.error_code == "InvalidSpotInstanceRequestID.NotFound"
+
+ for attempt in retry_ec2( retry_for=a_long_time,
+ retry_while=inconsistencies_detected ):
+ with attempt:
+ requests = ec2.request_spot_instances( price, image_id, count=num_instances, **spec )
+
+ if tags is not None:
+ for requestID in (request.id for request in requests):
+ for attempt in retry_ec2( retry_while=spotRequestNotFound ):
+ with attempt:
+ ec2.create_tags( [ requestID ], tags )
+
+ num_active, num_other = 0, 0
+ # noinspection PyUnboundLocalVariable,PyTypeChecker
+ # request_spot_instances's type annotation is wrong
+ for batch in wait_spot_requests_active( ec2,
+ requests,
+ timeout=timeout,
+ tentative=tentative ):
+ instance_ids = [ ]
+ for request in batch:
+ if request.state == 'active':
+ instance_ids.append( request.instance_id )
+ num_active += 1
+ else:
+ log.info( 'Request %s in unexpected state %s.', request.id, request.state )
+ num_other += 1
+ if instance_ids:
+ # This next line is the reason we batch. It's so we can get multiple instances in
+ # a single request.
+ yield ec2.get_only_instances( instance_ids )
+ if not num_active:
+ message = 'None of the spot requests entered the active state'
+ if tentative:
+ log.warn( message + '.' )
+ else:
+ raise RuntimeError( message )
+ if num_other:
+ log.warn( '%i request(s) entered a state other than active.', num_other )
+
+
+def inconsistencies_detected( e ):
+ if not isinstance( e, BotoServerError ): return False
+ if e.code == 'InvalidGroup.NotFound': return True
+ m = e.error_message.lower( )
+ return 'invalid iam instance profile' in m or 'no associated iam roles' in m
+
+
+def create_ondemand_instances( ec2, image_id, spec, num_instances=1 ):
+ """
+ Requests the RunInstances EC2 API call but accounts for the race between recently created
+ instance profiles, IAM roles and an instance creation that refers to them.
+
+ :rtype: list[Instance]
+ """
+ instance_type = spec[ 'instance_type' ]
+ log.info( 'Creating %s instance(s) ... ', instance_type )
+ for attempt in retry_ec2( retry_for=a_long_time,
+ retry_while=inconsistencies_detected ):
+ with attempt:
+ return ec2.run_instances( image_id,
+ min_count=num_instances,
+ max_count=num_instances,
+ **spec ).instances
+
+
+def tag_object_persistently( tagged_ec2_object, tags_dict ):
+ """
+ Object tagging occasionally fails with "NotFound" types of errors so we need to
+ retry a few times. Sigh ...
+
+ :type tagged_ec2_object: TaggedEC2Object
+ """
+ for attempt in retry_ec2( ):
+ with attempt:
+ tagged_ec2_object.add_tags( tags_dict )
diff --git a/lib/src/cgcloud/lib/message.py b/lib/src/cgcloud/lib/message.py
new file mode 100644
index 0000000..aa78745
--- /dev/null
+++ b/lib/src/cgcloud/lib/message.py
@@ -0,0 +1,54 @@
+import base64
+import json
+
+
+class UnknownVersion( Exception ):
+ def __init__( self, version ):
+ super( UnknownVersion, self ).__init__( "Unknown message version %d" % version )
+ self.version = version
+
+
+class Message( object ):
+ """
+ A message, mostly for passing information about events to agents. The message version is used
+ to differentiate between incompatible message formats. For example, adding a field is a
+ compatible change if there is a default value for that field, and does not require
+ incrementing the version. Message consumers should ignore versions they don't understand.
+ """
+
+ TYPE_UPDATE_SSH_KEYS = 1
+
+ @classmethod
+ def from_sqs( cls, sqs_message ):
+ """
+ :param sqs_message: the SQS message to initializes this instance from, assuiming that the
+ SQS message originates from a SQS queue that is subscribed to an SNS topic :type
+ sqs_message: SQSMessage
+
+ :return: the parsed message or None if the message is of an unkwown version
+ :rtype: Message
+ """
+ sns_message = json.loads( sqs_message.get_body( ) )
+ return Message.from_sns( sns_message[ 'Message' ] )
+
+ @classmethod
+ def from_sns( cls, message ):
+ return cls.from_dict( json.loads( base64.standard_b64decode( message ) ) )
+
+ @classmethod
+ def from_dict( cls, message ):
+ version = message[ 'version' ]
+ if version == 1:
+ return cls( type=message[ 'type' ] )
+ else:
+ raise UnknownVersion( version )
+
+ def __init__( self, type ):
+ super( Message, self ).__init__( )
+ self.type = type
+
+ def to_dict( self ):
+ return dict( version=1, type=self.type )
+
+ def to_sns( self ):
+ return base64.standard_b64encode( json.dumps( self.to_dict( ) ) )
\ No newline at end of file
diff --git a/lib/src/cgcloud/lib/test/__init__.py b/lib/src/cgcloud/lib/test/__init__.py
new file mode 100644
index 0000000..7bd05fb
--- /dev/null
+++ b/lib/src/cgcloud/lib/test/__init__.py
@@ -0,0 +1,47 @@
+import os
+import time
+from struct import pack
+from unittest import TestCase
+
+from boto.utils import get_instance_metadata
+
+from cgcloud.lib import aws_d32, test_namespace_suffix_length
+from cgcloud.lib.context import Context
+from cgcloud.lib.ec2 import running_on_ec2
+
+
+class CgcloudTestCase( TestCase ):
+ """
+ A base class for CGCloud test cases. When run with CGCLOUD_NAMESPACE unset, a new test
+ namespace will be prepared during setup and cleaned up during teardown. Otherwise,
+ the configured namespace will be used but not cleaned up.
+ """
+ __namespace = None
+ cleanup = True
+ ctx = None
+
+ @classmethod
+ def setUpClass( cls ):
+ super( CgcloudTestCase, cls ).setUpClass( )
+ if running_on_ec2( ):
+ os.environ.setdefault( 'CGCLOUD_ZONE',
+ get_instance_metadata( )[ 'placement' ][ 'availability-zone' ] )
+ # Using the d32 of a binary string that starts with a 4-byte, big-endian time stamp
+ # yields compact names whose lexicographical sorting is consistent with the historical
+ # order. We add the process ID so we can run tests concurrently in child processes using
+ # the pytest-xdist plugin.
+ suffix = aws_d32.encode( pack( '>II', int( time.time( ) ), os.getpid( ) ) )
+ assert len( suffix ) == test_namespace_suffix_length
+ cls.__namespace = '/test/%s/' % suffix
+ os.environ.setdefault( 'CGCLOUD_NAMESPACE', cls.__namespace )
+ cls.ctx = Context( availability_zone=os.environ[ 'CGCLOUD_ZONE' ],
+ namespace=os.environ[ 'CGCLOUD_NAMESPACE' ] )
+
+ @classmethod
+ def tearDownClass( cls ):
+ # Only cleanup if the context is using the default test namespace. If another namespace
+ # is configured, we can't assume that all resources were created by the test and that
+ # they can therefore be removed.
+ if cls.cleanup and cls.ctx.namespace == cls.__namespace:
+ cls.ctx.reset_namespace_security( )
+ super( CgcloudTestCase, cls ).tearDownClass( )
diff --git a/lib/src/cgcloud/lib/util.py b/lib/src/cgcloud/lib/util.py
new file mode 100644
index 0000000..2c1f568
--- /dev/null
+++ b/lib/src/cgcloud/lib/util.py
@@ -0,0 +1,887 @@
+import argparse
+import base64
+import hashlib
+import logging
+import multiprocessing
+import multiprocessing.pool
+import os
+import re
+import struct
+import subprocess
+import sys
+from StringIO import StringIO
+from abc import ABCMeta, abstractmethod
+from collections import Sequence
+from itertools import islice, count
+from math import sqrt
+from textwrap import dedent
+
+from bd2k.util.iterables import concat
+from bd2k.util.strings import interpolate
+
+log = logging.getLogger( __name__ )
+
+try:
+ from cgcloud.crypto.PublicKey import RSA
+except ImportError:
+ from cgcloud_Crypto.PublicKey import RSA
+
+cores = multiprocessing.cpu_count( )
+
+
+def unpack_singleton( singleton ):
+ """
+ Expects a iterable with exactly one element and returns that element. If the iterable is
+ empty or yields more than one element an exception will be thrown.
+
+ >>> unpack_singleton([0])
+ 0
+
+ >>> unpack_singleton([])
+ Traceback (most recent call last):
+ ....
+ RuntimeError: Expected singleton, got empty iterable
+
+ >>> unpack_singleton([0,1])
+ Traceback (most recent call last):
+ ....
+ RuntimeError: Expected singleton, got iterable with more than one element
+ """
+ it = iter( singleton )
+ try:
+ result = it.next( )
+ except StopIteration:
+ raise RuntimeError( "Expected singleton, got empty iterable" )
+ try:
+ it.next( )
+ raise RuntimeError( "Expected singleton, got iterable with more than one element" )
+ except StopIteration:
+ return result
+
+
+def mean( xs ):
+ """
+ Return the mean value of a sequence of values.
+
+ >>> mean([2,4,4,4,5,5,7,9])
+ 5.0
+ >>> mean([9,10,11,7,13])
+ 10.0
+ >>> mean([1,1,10,19,19])
+ 10.0
+ >>> mean([10,10,10,10,10])
+ 10.0
+ >>> mean([1,"b"])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input can't have non-numeric elements
+ >>> mean([])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input can't be empty
+ """
+ try:
+ return sum( xs ) / float( len( xs ) )
+ except TypeError:
+ raise ValueError( "Input can't have non-numeric elements" )
+ except ZeroDivisionError:
+ raise ValueError( "Input can't be empty" )
+
+
+def std_dev( xs ):
+ """
+ Returns the standard deviation of the given iterable of numbers.
+
+ From http://rosettacode.org/wiki/Standard_deviation#Python
+
+ An empty list, or a list with non-numeric elements will raise a TypeError.
+
+ >>> std_dev([2,4,4,4,5,5,7,9])
+ 2.0
+
+ >>> std_dev([9,10,11,7,13])
+ 2.0
+
+ >>> std_dev([1,1,10,19,19])
+ 8.049844718999243
+
+ >>> std_dev({1,1,10,19,19}) == std_dev({19,10,1})
+ True
+
+ >>> std_dev([10,10,10,10,10])
+ 0.0
+
+ >>> std_dev([1,"b"])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input can't have non-numeric elements
+
+ >>> std_dev([])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input can't be empty
+ """
+ m = mean( xs ) # this checks our pre-conditions, too
+ return sqrt( sum( (x - m) ** 2 for x in xs ) / float( len( xs ) ) )
+
+
+def camel_to_snake( s, separator='_' ):
+ """
+ Converts camel to snake case
+
+ >>> camel_to_snake('CamelCase')
+ 'camel_case'
+
+ >>> camel_to_snake('Camel_Case')
+ 'camel_case'
+
+ >>> camel_to_snake('camelCase')
+ 'camel_case'
+
+ >>> camel_to_snake('USA')
+ 'usa'
+
+ >>> camel_to_snake('TeamUSA')
+ 'team_usa'
+
+ >>> camel_to_snake('Team_USA')
+ 'team_usa'
+
+ >>> camel_to_snake('R2D2')
+ 'r2_d2'
+
+ >>> camel_to_snake('ToilPre310Box',separator='-')
+ 'toil-pre-310-box'
+
+ >>> camel_to_snake('Toil310Box',separator='-')
+ 'toil-310-box'
+ """
+ s = re.sub( '([a-z0-9])([A-Z])', r'\1%s\2' % separator, s )
+ s = re.sub( '([a-z])([A-Z0-9])', r'\1%s\2' % separator, s )
+ return s.lower( )
+
+
+def snake_to_camel( s, separator='_' ):
+ """
+ Converts snake to camel case
+
+ >>> snake_to_camel('')
+ ''
+
+ >>> snake_to_camel('_x____yz')
+ 'XYz'
+
+ >>> snake_to_camel('camel_case')
+ 'CamelCase'
+
+ >>> snake_to_camel('r2_d2')
+ 'R2D2'
+
+ >>> snake_to_camel('m1.small', '.')
+ 'M1Small'
+ """
+ return ''.join( [ w.capitalize( ) for w in s.split( separator ) ] )
+
+
+def abreviated_snake_case_class_name( cls, root_cls=object ):
+ """
+ Returns the snake-case (with '-' instead of '_') version of the name of a given class with
+ the name of another class removed from the end.
+
+ :param cls: the class whose name to abreviate
+
+ :param root_cls: an ancestor of cls, whose name will be removed from the end of the name of cls
+
+ :return: cls.__name__ with root_cls.__name__ removed, converted to snake case with - as the
+ separator
+
+ >>> class Dog: pass
+ >>> abreviated_snake_case_class_name(Dog)
+ 'dog'
+ >>> class Dog: pass
+ >>> abreviated_snake_case_class_name(Dog,Dog)
+ ''
+ >>> class BarkingDog(Dog): pass
+ >>> abreviated_snake_case_class_name(BarkingDog,Dog)
+ 'barking'
+ >>> class SleepingGrowlingDog(Dog): pass
+ >>> abreviated_snake_case_class_name(SleepingGrowlingDog,Dog)
+ 'sleeping-growling'
+ >>> class Lumpi(SleepingGrowlingDog): pass
+ >>> abreviated_snake_case_class_name(Lumpi,Dog)
+ 'lumpi'
+ """
+ name = cls.__name__
+ suffix = root_cls.__name__
+ if name.endswith( suffix ): name = name[ :-len( suffix ) ]
+ return camel_to_snake( name, separator='-' )
+
+
+class UserError( RuntimeError ):
+ def __init__( self, message=None, cause=None ):
+ if message is None == cause is None:
+ raise RuntimeError( "Must pass either message or cause." )
+ super( UserError, self ).__init__( message if cause is None else cause.message )
+
+
+def app_name( ):
+ return os.path.splitext( os.path.basename( sys.argv[ 0 ] ) )[ 0 ]
+
+
+class Application( object ):
+ """
+ An attempt at modularizing command line parsing (argparse). This is an experiment. The
+ general idea is to expose an application's functionality on the command line as separate
+ subcommands, each subcommmand is represented by a separate class each of which gets its own
+ subparser (an argparse concept). This collects both, the subcommand's functionality and the
+ code that sets up the command line interface to that functionality under the umbrella of a
+ single class.
+
+ >>> class FooCommand( Command ):
+ ... def __init__(self, app):
+ ... super( FooCommand, self ).__init__( app, help='Do some voodoo' )
+ ... self.option( '--verbose', action='store_true' )
+ ...
+ ... def run(self, options):
+ ... print 'Voodoo Magic' if options.verbose else 'Juju'
+
+ >>> app = Application()
+ >>> app.add( FooCommand )
+ >>> app.run( [ "foo", "--verbose" ] ) # foo is the command name
+ Voodoo Magic
+ >>> app.run( [ "foo" ] )
+ Juju
+ """
+
+ def __init__( self ):
+ """
+ Initializes the argument parser
+ """
+ super( Application, self ).__init__( )
+ self.args = None
+ self.parser = argparse.ArgumentParser( formatter_class=ArgParseHelpFormatter )
+ # noinspection PyProtectedMember
+ self.parser._positionals.title = 'Commands'
+ # noinspection PyProtectedMember
+ self.parser._optionals.title = 'Global options'
+ self.subparsers = self.parser.add_subparsers( help='Application commands',
+ dest='command_name' )
+ self.commands = { }
+
+ def option( self, *args, **kwargs ):
+ self._option( self.parser, args, kwargs )
+
+ @classmethod
+ def _option( cls, target, args, kwargs ):
+ try:
+ completer = kwargs.pop( 'completer' )
+ except KeyError:
+ completer = None
+ argument = target.add_argument( *args, **kwargs )
+ if completer is not None:
+ argument.completer = completer
+
+ def add( self, command_class ):
+ """
+ Instantiates a command of the specified class and adds it to this application.
+ """
+ command = command_class( self )
+ self.commands[ command.name( ) ] = command
+
+ def run( self, args=None ):
+ """
+ Parses the command line into an options object using arparse and invokes the requested
+ command's run() method with that options object.
+ """
+ # Pull in bash auto completion if available
+ try:
+ # noinspection PyUnresolvedReferences
+ import argcomplete
+ except ImportError:
+ pass
+ else:
+ argcomplete.autocomplete( self.parser )
+ self.args = args
+ options = self.parser.parse_args( args )
+ self.prepare( options )
+ command = self.commands[ options.command_name ]
+ command.run( options )
+
+ def prepare( self, options ):
+ pass
+
+
+class Command( object ):
+ """
+ An abstract base class for an applications commands.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def run( self, options ):
+ """
+ Execute this command.
+
+ :param options: the parsed command line arguments
+ """
+ raise NotImplementedError( )
+
+ def __init__( self, application, **kwargs ):
+ """
+ Initializes this command.
+ :param application: The application this command belongs to.
+ :type application: Application
+ :param kwargs: optional arguments to the argparse's add_parser() method
+ """
+ super( Command, self ).__init__( )
+ self.application = application
+ doc = self.__class__.__doc__
+ help_ = doc.split( '\n\n', 1 )[ 0 ] if doc else None
+ if not 'help' in kwargs:
+ kwargs[ 'help' ] = help_
+ if not 'description' in kwargs:
+ kwargs[ 'description' ] = doc
+ self.parser = application.subparsers.add_parser(
+ self.name( ),
+ formatter_class=ArgParseHelpFormatter,
+ **kwargs )
+ # noinspection PyProtectedMember
+ self.parser._positionals.title = 'Command arguments'
+ # noinspection PyProtectedMember
+ self.parser._optionals.title = 'Command options'
+ self.group = None
+
+ def option( self, *args, **kwargs ):
+ target = self.parser if self.group is None else self.group
+ # noinspection PyProtectedMember
+ self.application._option( target, args, kwargs )
+
+ def name( self ):
+ """
+ Returns the name of this command as referred to by the user when invoking it via the
+ command line. The command name is the snake-case version (with dashes instead of
+ underscores) of this command's class name, minus its 'Command' suffix.
+
+ >>> class FooBarCommand(Command):
+ ... def run( self, options ):
+ ... pass
+ >>> app=Application()
+ >>> FooBarCommand(app).name()
+ 'foo-bar'
+ """
+ # noinspection PyTypeChecker
+ return abreviated_snake_case_class_name( type( self ), Command )
+
+ def begin_mutex( self, **kwargs ):
+ self.group = self.parser.add_mutually_exclusive_group( **kwargs )
+
+ def end_mutex( self ):
+ self.group = None
+
+
+class ArgParseHelpFormatter( argparse.ArgumentDefaultsHelpFormatter ):
+ # noinspection PyBroadException
+ try:
+ with open( os.devnull, 'a' ) as devnull:
+ rows, columns = map( int, subprocess.check_output( [ 'stty', 'size' ],
+ stderr=devnull ).split( ) )
+ except:
+ rows, columns = None, None
+
+ def __init__( self, *args, **kwargs ):
+ super( ArgParseHelpFormatter, self ).__init__( *args,
+ width=min( 100, self.columns ),
+ max_help_position=30,
+ **kwargs )
+
+
+empty_line_re = re.compile( r'^\s*(#.*)$' )
+
+
+def prepend_shell_script( script, in_file, out_file ):
+ """
+ Writes all lines from the specified input to the specified output. Input and output are both
+ assumed to be file-like objects. Reading from the input as well as writing to the output
+ starts at the current position in the respective file-like object. Unless the given script is
+ empty or None, and before writing the first script line from the input, the given script
+ will be written to the output, followed by a new line. A script line is a line that is not
+ empty. An empty line is a line that contains only whitespace, a comment or both.
+
+ >>> i,o = StringIO(''), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ 'hello\\n'
+
+ >>> i,o = StringIO(''), StringIO()
+ >>> prepend_shell_script('',i,o)
+ >>> o.getvalue()
+ ''
+
+ >>> i,o = StringIO('\\n'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ 'hello\\n\\n'
+
+ >>> i,o = StringIO('#foo\\n'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ '#foo\\nhello\\n'
+
+ >>> i,o = StringIO(' # foo \\nbar\\n'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ ' # foo \\nhello\\nbar\\n'
+
+ >>> i,o = StringIO('bar\\n'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ 'hello\\nbar\\n'
+
+ >>> i,o = StringIO('#foo'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ '#foo\\nhello\\n'
+
+ >>> i,o = StringIO('#foo\\nbar # bla'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ '#foo\\nhello\\nbar # bla\\n'
+
+ >>> i,o = StringIO(' bar # foo'), StringIO()
+ >>> prepend_shell_script('hello',i,o)
+ >>> o.getvalue()
+ 'hello\\n bar # foo\\n'
+ """
+
+ def write_line( line ):
+ out_file.write( line )
+ if not line.endswith( '\n' ):
+ out_file.write( '\n' )
+
+ line = None
+ for line in in_file:
+ if not empty_line_re.match( line ): break
+ write_line( line )
+ line = None
+ if script: write_line( script )
+ if line: write_line( line )
+ for line in in_file:
+ write_line( line )
+
+
+def partition_seq( seq, size ):
+ """
+ Splits a sequence into an iterable of subsequences. All subsequences are of the given size,
+ except the last one, which may be smaller. If the input list is modified while the returned
+ list is processed, the behavior of the program is undefined.
+
+ :param seq: the list to split
+ :param size: the desired size of the sublists, must be > 0
+ :type size: int
+ :return: an iterable of sublists
+
+ >>> list(partition_seq("",1))
+ []
+ >>> list(partition_seq("abcde",2))
+ ['ab', 'cd', 'e']
+ >>> list(partition_seq("abcd",2))
+ ['ab', 'cd']
+ >>> list(partition_seq("abcde",1))
+ ['a', 'b', 'c', 'd', 'e']
+ >>> list(partition_seq("abcde",0))
+ Traceback (most recent call last):
+ ...
+ ValueError: Size must be greater than 0
+ >>> l=[1,2,3,4]
+ >>> i = iter( partition_seq(l,2) )
+ >>> l.pop(0)
+ 1
+ >>> i.next()
+ [2, 3]
+ """
+ if size < 1:
+ raise ValueError( 'Size must be greater than 0' )
+ return (seq[ pos:pos + size ] for pos in xrange( 0, len( seq ), size ))
+
+
+def ec2_keypair_fingerprint( ssh_key, reject_private_keys=False ):
+ """
+ Computes the fingerrint of a public or private OpenSSH key in the way Amazon does it for
+ keypairs resulting from either importing a SSH public key or generating a new keypair.
+
+ :param ssh_key: a RSA public key in OpenSSH format, or an RSA private key in PEM format
+
+ :return: The fingerprint of the key, in pairs of two hex digits with a colon between
+ pairs.
+
+ >>> ssh_pubkey = 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvdDMvcwC1/5ByUhO1wh1sG6ficwgGHRab/p'\\
+ ... 'm6LN60rgxv+u2eJRao2esGB9Oyt863+HnjKj/NBdaiHTHcAHNq/TapbvEjgHaKgrVdfeMdQbJhWjJ97rql9Yn8k'\\
+ ... 'TNsXOeSyTW7rIKE0zeQkrwhsztmATumbQmJUMR7uuI31BxhQUfD/CoGZQrxFalWLDZcrcYY13ynplaNA/Hd/vP6'\\
+ ... 'qWO5WC0dTvzROEp7VwzJ7qeN2kP1JTh+kgVRoYd9mSm6x9UVjY6jQtZHa01Eg05sFraWgvNAvKhk9LS9Kiwhq8D'\\
+ ... 'xHdWdTamnGLtwXYQbn7RjG3UADAiTOWk+QSmU2igZvQ2F hannes at soe.ucsc.edu\\n'
+ >>> ec2_keypair_fingerprint(ssh_pubkey)
+ 'a5:5a:64:8a:1e:3f:4e:46:cd:1f:e9:b3:fc:cf:c5:19'
+
+ >>> # This is not a private key that is in use, in case you were wondering
+ >>> ssh_private_key = \\
+ ... '-----BEGIN RSA PRIVATE KEY-----\\n'+\\
+ ... 'MIIEpQIBAAKCAQEAi3shPK00+/6dwW8u+iDkUYiwIKl/lv0Ay5IstLszwb3CA4mVRlyq769HzE8f\\n'\\
+ ... 'cnzQUX/NI8y9MTO0UNt2JDMJWW5L49jmvxV0TjxQjKg8KcNzYuHsEny3k8LxezWMsmwlrrC89O6e\\n'\\
+ ... 'oo6boc8ForSdjVdIlJbvWu/82dThyFgTjWd5B+1O93xw8/ejqY9PfZExBeqpKjm58OUByTpVhvWe\\n'\\
+ ... 'jmbZ9BL60XJhwz9bDTrlKpjcGsMZ74G6XfQAhyyqXYeD/XOercCSJgQ/QjYKcPE9yMRyucHyuYZ8\\n'\\
+ ... 'HKzmG+u4p5ffnFb43tKzWCI330JQcklhGTldyqQHDWA41mT1QMoWfwIDAQABAoIBAF50gryRWykv\\n'\\
+ ... 'cuuUfI6ciaGBXCyyPBomuUwicC3v/Au+kk1M9Y7RoFxyKb/88QHZ7kTStDwDITfZmMmM5QN8oF80\\n'\\
+ ... 'pyXkM9bBE6MLi0zFfQCXQGN9NR4L4VGqGVfjmqUVQat8Omnv0fOpeVFpXZqij3Mw4ZDmaa7+iA+H\\n'\\
+ ... '72J56ru9i9wcBNqt//Kh5BXARekp7tHzklYrlqJd03ftDRp9GTBIFAsaPClTBpnPVhwD/rAoJEhb\\n'\\
+ ... 'KM9g/EMjQ28cUMQSHSwOyi9Rg/LtwFnER4u7pnBz2tbJFvLlXE96IQbksQL6/PTJ9H6Zpp+1fDcI\\n'\\
+ ... 'k/MKSQZtQOgfV8V1wlvHX+Q0bxECgYEA4LHj6o4usINnSy4cf6BRLrCA9//ePa8UjEK2YDC5rQRV\\n'\\
+ ... 'huFWqWJJSjWI9Ofjh8mZj8NvTJa9RW4d4Rn6F7upOuAer9obwfrmi4BEQSbvUwxQIuHOZ6itH/0L\\n'\\
+ ... 'klqQBuhJeyr3W+2IhudJUQz9MEoddOfYIybXqkF7XzDl2x6FcjcCgYEAnunySmjt+983gUKK9DgK\\n'\\
+ ... '/k1ki41jCAcFlGd8MbLEWkJpwt3FJFiyq6vVptoVH8MBnVAOjDneP6YyNBv5+zm3vyMuVJtKNcAP\\n'\\
+ ... 'MAxrl5/gyIBHRxD+avoqpQX/17EmrFsbMaG8IM0ZWB2lSDt45sDvpmSlcTjzrHIEGoBbOzkOefkC\\n'\\
+ ... 'gYEAgmS5bxSz45teBjLsNuRCOGYVcdX6krFXq03LqGaeWdl6CJwcPo/bGEWZBQbM86/6fYNcw4V2\\n'\\
+ ... 'sSQGEuuQRtWQj6ogJMzd7uQ7hhkZgvWlTPyIRLXloiIw1a9zV6tWiaujeOamRaLC6AawdWikRbG9\\n'\\
+ ... 'BmrE8yFHZnY5sjQeL9q2dmECgYEAgp5w1NCirGCxUsHLTSmzf4tFlZ9FQxficjUNVBxIYJguLkny\\n'\\
+ ... '/Qka8xhuqJKgwlabQR7IlmIKV+7XXRWRx/mNGsJkFo791GhlE21iEmMLdEJcVAGX3X57BuGDhVrL\\n'\\
+ ... 'GuhX1dfGtn9e0ZqsfE7F9YWodfBMPGA/igK9dLsEQg2H5KECgYEAvlv0cPHP8wcOL3g9eWIVCXtg\\n'\\
+ ... 'aQ+KiDfk7pihLnHTJVZqXuy0lFD+O/TqxGOOQS/G4vBerrjzjCXXXxi2FN0kDJhiWlRHIQALl6rl\\n'\\
+ ... 'i2LdKfL1sk1IA5PYrj+LmBuOLpsMHnkoH+XRJWUJkLvowaJ0aSengQ2AD+icrc/EIrpcdjU=\\n'+\\
+ ... '-----END RSA PRIVATE KEY-----\\n'
+ >>> ec2_keypair_fingerprint(ssh_private_key)
+ 'ac:23:ae:c3:9a:a3:78:b1:0f:8a:31:dd:13:cc:b1:8e:fb:51:42:f8'
+ """
+ rsa_key = RSA.importKey( ssh_key )
+ is_private_key = rsa_key.has_private( )
+ if is_private_key and reject_private_keys:
+ raise ValueError( 'Private keys are disallowed' )
+ der_rsa_key = rsa_key.exportKey( format='DER', pkcs=(8 if is_private_key else 1) )
+ key_hash = (hashlib.sha1 if is_private_key else hashlib.md5)( der_rsa_key )
+ return ':'.join( partition_seq( key_hash.hexdigest( ), 2 ) )
+
+
+def private_to_public_key( private_ssh_key ):
+ """
+ Returns the public key in OpenSSH format (as used in the authorized_keys file) for a given
+ private RSA key in PEM format.
+ >>> ssh_private_key = \\
+ ... '-----BEGIN RSA PRIVATE KEY-----\\n'+\\
+ ... 'MIIEpQIBAAKCAQEAi3shPK00+/6dwW8u+iDkUYiwIKl/lv0Ay5IstLszwb3CA4mVRlyq769HzE8f\\n'+\\
+ ... 'cnzQUX/NI8y9MTO0UNt2JDMJWW5L49jmvxV0TjxQjKg8KcNzYuHsEny3k8LxezWMsmwlrrC89O6e\\n'+\\
+ ... 'oo6boc8ForSdjVdIlJbvWu/82dThyFgTjWd5B+1O93xw8/ejqY9PfZExBeqpKjm58OUByTpVhvWe\\n'+\\
+ ... 'jmbZ9BL60XJhwz9bDTrlKpjcGsMZ74G6XfQAhyyqXYeD/XOercCSJgQ/QjYKcPE9yMRyucHyuYZ8\\n'+\\
+ ... 'HKzmG+u4p5ffnFb43tKzWCI330JQcklhGTldyqQHDWA41mT1QMoWfwIDAQABAoIBAF50gryRWykv\\n'+\\
+ ... 'cuuUfI6ciaGBXCyyPBomuUwicC3v/Au+kk1M9Y7RoFxyKb/88QHZ7kTStDwDITfZmMmM5QN8oF80\\n'+\\
+ ... 'pyXkM9bBE6MLi0zFfQCXQGN9NR4L4VGqGVfjmqUVQat8Omnv0fOpeVFpXZqij3Mw4ZDmaa7+iA+H\\n'+\\
+ ... '72J56ru9i9wcBNqt//Kh5BXARekp7tHzklYrlqJd03ftDRp9GTBIFAsaPClTBpnPVhwD/rAoJEhb\\n'+\\
+ ... 'KM9g/EMjQ28cUMQSHSwOyi9Rg/LtwFnER4u7pnBz2tbJFvLlXE96IQbksQL6/PTJ9H6Zpp+1fDcI\\n'+\\
+ ... 'k/MKSQZtQOgfV8V1wlvHX+Q0bxECgYEA4LHj6o4usINnSy4cf6BRLrCA9//ePa8UjEK2YDC5rQRV\\n'+\\
+ ... 'huFWqWJJSjWI9Ofjh8mZj8NvTJa9RW4d4Rn6F7upOuAer9obwfrmi4BEQSbvUwxQIuHOZ6itH/0L\\n'+\\
+ ... 'klqQBuhJeyr3W+2IhudJUQz9MEoddOfYIybXqkF7XzDl2x6FcjcCgYEAnunySmjt+983gUKK9DgK\\n'+\\
+ ... '/k1ki41jCAcFlGd8MbLEWkJpwt3FJFiyq6vVptoVH8MBnVAOjDneP6YyNBv5+zm3vyMuVJtKNcAP\\n'+\\
+ ... 'MAxrl5/gyIBHRxD+avoqpQX/17EmrFsbMaG8IM0ZWB2lSDt45sDvpmSlcTjzrHIEGoBbOzkOefkC\\n'+\\
+ ... 'gYEAgmS5bxSz45teBjLsNuRCOGYVcdX6krFXq03LqGaeWdl6CJwcPo/bGEWZBQbM86/6fYNcw4V2\\n'+\\
+ ... 'sSQGEuuQRtWQj6ogJMzd7uQ7hhkZgvWlTPyIRLXloiIw1a9zV6tWiaujeOamRaLC6AawdWikRbG9\\n'+\\
+ ... 'BmrE8yFHZnY5sjQeL9q2dmECgYEAgp5w1NCirGCxUsHLTSmzf4tFlZ9FQxficjUNVBxIYJguLkny\\n'+\\
+ ... '/Qka8xhuqJKgwlabQR7IlmIKV+7XXRWRx/mNGsJkFo791GhlE21iEmMLdEJcVAGX3X57BuGDhVrL\\n'+\\
+ ... 'GuhX1dfGtn9e0ZqsfE7F9YWodfBMPGA/igK9dLsEQg2H5KECgYEAvlv0cPHP8wcOL3g9eWIVCXtg\\n'+\\
+ ... 'aQ+KiDfk7pihLnHTJVZqXuy0lFD+O/TqxGOOQS/G4vBerrjzjCXXXxi2FN0kDJhiWlRHIQALl6rl\\n'+\\
+ ... 'i2LdKfL1sk1IA5PYrj+LmBuOLpsMHnkoH+XRJWUJkLvowaJ0aSengQ2AD+icrc/EIrpcdjU=\\n'+\\
+ ... '-----END RSA PRIVATE KEY-----\\n'
+ >>> ssh_pubkey = 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCLeyE8rTT7/p3Bby76IORRiLA'\\
+ ... 'gqX+W/QDLkiy0uzPBvcIDiZVGXKrvr0fMTx9yfNBRf80jzL0xM7RQ23YkMwlZbkvj2Oa/FXROPFC'\\
+ ... 'MqDwpw3Ni4ewSfLeTwvF7NYyybCWusLz07p6ijpuhzwWitJ2NV0iUlu9a7/zZ1OHIWBONZ3kH7U7'\\
+ ... '3fHDz96Opj099kTEF6qkqObnw5QHJOlWG9Z6OZtn0EvrRcmHDP1sNOuUqmNwawxnvgbpd9ACHLKp'\\
+ ... 'dh4P9c56twJImBD9CNgpw8T3IxHK5wfK5hnwcrOYb67inl9+cVvje0rNYIjffQlBySWEZOV3KpAc'\\
+ ... 'NYDjWZPVAyhZ/'
+ >>> private_to_public_key(ssh_private_key) == ssh_pubkey
+ True
+ """
+ rsa_key = RSA.importKey( private_ssh_key )
+ if rsa_key.has_private( ):
+ return rsa_key.publickey( ).exportKey( format='OpenSSH' )
+ else:
+ raise ValueError( 'Expected private key' )
+
+
+def volume_label_hash( s ):
+ """
+ Linux volume labels are typically limited to 12 or 16 characters while the strings we want to
+ use for them are longer, usually a namespaced role name with additional data at the end. This
+ hash function returns a 12-character string that is reasonably representative of the input
+ string.
+
+ >>> volume_label_hash( 'hannes_spark-master__0' )
+ 'i0u77fnocoo'
+ >>> volume_label_hash( '' )
+ 'PZ2FQWP48Ho'
+ >>> volume_label_hash( ' ' )
+ 'oIf03JUELnY'
+ >>> volume_label_hash( '1' )
+ 'yQYSos_Mpxk'
+ """
+ h = hashlib.md5( s )
+ h = h.digest( )
+ assert len( h ) == 16
+ hi, lo = struct.unpack( '!QQ', h )
+ h = hi ^ lo
+ h = struct.pack( '!Q', h )
+ assert len( h ) == 8
+ h = base64.urlsafe_b64encode( h )
+ assert h[ -1 ] == '='
+ return h[ :-1 ]
+
+
+def prefix_lines( text, prefix ):
+ """
+ Prefix each non-empty line in the given text with the given prefix.
+
+ >>> prefix_lines('',' ')
+ ''
+ >>> prefix_lines(' ',' ')
+ ' '
+ >>> prefix_lines('\\n',' ')
+ '\\n'
+ >>> prefix_lines('x',' ')
+ ' x'
+ >>> prefix_lines('x\\n',' ')
+ ' x\\n'
+ >>> prefix_lines('x\\ny\\n', ' ' )
+ ' x\\n y\\n'
+ >>> prefix_lines('x\\ny', ' ' )
+ ' x\\n y'
+ """
+ return '\n'.join( prefix + l if l else l for l in text.split( '\n' ) )
+
+
+def heredoc( s, indent=None ):
+ """
+ Here-documents [1] for Python. Unindents the given string and interpolates format()-like
+ placeholders with local variables from the calling method's stack frame. The interpolation
+ part is a bit like black magic but it is tremendously useful.
+
+ [1]: https://en.wikipedia.org/wiki/Here_document
+
+ >>> x, y = 42, 7
+ >>> heredoc( '''
+ ... x is {x}
+ ... y is {y}
+ ... ''' )
+ 'x is 42\\ny is 7\\n'
+ """
+ if s[ 0 ] == '\n': s = s[ 1: ]
+ if s[ -1 ] != '\n': s += '\n'
+ s = dedent( s )
+ if indent is not None:
+ s = prefix_lines( s, indent )
+ return interpolate( s, skip_frames=1 )
+
+
+try:
+ # noinspection PyUnresolvedReferences
+ from concurrent.futures import ThreadPoolExecutor
+except ImportError:
+ # Fall back to the old implementation that uses the undocument thread pool in
+ # multiprocessing. It does not allow interruption via Ctrl-C.
+ from contextlib import contextmanager
+
+
+ @contextmanager
+ def thread_pool( size ):
+ """
+ A context manager that yields a thread pool of the given size. On normal closing,
+ this context manager closes the pool and joins all threads in it. On exceptions, the pool
+ will be terminated but threads won't be joined.
+ """
+ pool = multiprocessing.pool.ThreadPool( processes=size )
+ try:
+ yield pool
+ except:
+ pool.terminate( )
+ raise
+ else:
+ pool.close( )
+ pool.join( )
+else:
+ # If the futures backport is installed, use that as it is documented and handles Ctrl-C more
+ # gracefully.
+ # noinspection PyPep8Naming
+ class thread_pool( object ):
+ """
+ A context manager that yields a thread pool of the given size. On normal closing,
+ this context manager closes the pool and joins all threads in it. On exceptions, the pool
+ will be terminated but threads won't be joined.
+ """
+
+ def __init__( self, size ):
+ self.executor = ThreadPoolExecutor( size )
+
+ def __enter__( self ):
+ return self
+
+ # noinspection PyUnusedLocal
+ def __exit__( self, exc_type, exc_val, exc_tb ):
+ self.executor.shutdown( wait=exc_type is None )
+
+ def apply_async( self, fn, args, callback=None ):
+ future = self.executor.submit( fn, *args )
+ if callback is not None:
+ future.add_done_callback( lambda f: callback( f.result( ) ) )
+
+ def map( self, fn, iterable ):
+ return list( self.executor.map( fn, iterable ) )
+
+
+def pmap( f, seq, pool_size=cores ):
+ """
+ Apply the given function to each element of the given sequence and return a sequence of the
+ result of each function application. Do so in parallel, using a thread pool no larger than
+ the given size.
+
+ :param callable f: the function to be applied
+
+ :param Sequence seq: the input sequence
+
+ :param int pool_size: the desired pool size, if absent the number of CPU cores will be used.
+ The actual pool size may be smaller if the input sequence is small. A pool size of 0 will
+ make this function behave exactly like the map() builtin, i.e. the function will be applied
+ serially in the current thread.
+
+ >>> pmap( lambda (a, b): a + b, [], pool_size=0 )
+ []
+ >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=0 )
+ [3]
+ >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=0 )
+ [3, 7]
+ >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=0 )
+ Traceback (most recent call last):
+ ...
+ TypeError: <lambda>() takes exactly 2 arguments (1 given)
+ >>> pmap( lambda (a, b): a + b, [], pool_size=1 )
+ []
+ >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=1 )
+ [3]
+ >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=1 )
+ [3, 7]
+ >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=1 )
+ Traceback (most recent call last):
+ ...
+ TypeError: <lambda>() takes exactly 2 arguments (1 given)
+ >>> pmap( lambda (a, b): a + b, [], pool_size=2 )
+ []
+ >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=2 )
+ [3]
+ >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=2 )
+ [3, 7]
+ >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=2 )
+ Traceback (most recent call last):
+ ...
+ TypeError: <lambda>() takes exactly 2 arguments (1 given)
+ """
+ __check_pool_size( pool_size )
+ n = len( seq )
+ if n:
+ if pool_size == 0:
+ return map( f, seq )
+ else:
+ with thread_pool( min( pool_size, n ) ) as pool:
+ return pool.map( f, seq )
+ else:
+ return [ ]
+
+
+def papply( f, seq, pool_size=cores, callback=None ):
+ """
+ Apply the given function to each element of the given sequence, optionally invoking the given
+ callback with the result of each application. Do so in parallel, using a thread pool no
+ larger than the given size.
+
+ :param callable f: the function to be applied
+
+ :param Sequence seq: the input sequence
+
+ :param int pool_size: the desired pool size, if absent the number of CPU cores will be used.
+ The actual pool size may be smaller if the input sequence is small.A pool size of 0 will make
+ this function emulate the apply() builtin, i.e. f (and the callback, if provided) will be
+ invoked serially in the current thread.
+
+ :param callable callback: an optional function to be invoked with the return value of f
+
+ >>> l=[]; papply( lambda a, b: a + b, [], pool_size=0, callback=l.append ); l
+ []
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=0, callback=l.append); l
+ [3]
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=0, callback=l.append ); l
+ [3, 7]
+ >>> l=[]; papply( lambda a, b: a + b, [], pool_size=1, callback=l.append ); l
+ []
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=1, callback=l.append); l
+ [3]
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=1, callback=l.append ); l
+ [3, 7]
+ >>> l=[]; papply( lambda a, b: a + b, [], pool_size=2, callback=l.append ); l
+ []
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=2, callback=l.append); l
+ [3]
+ >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=2, callback=l.append ); l
+ [3, 7]
+ """
+ __check_pool_size( pool_size )
+ n = len( seq )
+ if n:
+ if pool_size == 0:
+ for args in seq:
+ result = apply( f, args )
+ if callback is not None:
+ callback( result )
+ else:
+ with thread_pool( min( pool_size, n ) ) as pool:
+ for args in seq:
+ pool.apply_async( f, args, callback=callback )
+
+
+def __check_pool_size( pool_size ):
+ if pool_size < 0:
+ raise ValueError( 'Pool size must be >= 0' )
+
+
+def allocate_cluster_ordinals( num, used ):
+ """
+ Return an iterator containing a given number of unused cluster ordinals. The result is
+ guaranteed to yield each ordinal exactly once, i.e. the result is set-like. The argument
+ set and the result iterator will be disjoint. The sum of all ordinals in the argument and
+ the result is guaranteed to be minimal, i.e. the function will first fill the gaps in the
+ argument before allocating higher values. The result will yield ordinal in ascending order.
+
+ :param int num: the number of ordinal to allocate
+ :param set[int] used: a set of currently used ordinal
+ :rtype: iterator
+
+ >>> f = allocate_cluster_ordinals
+
+ >>> list(f(0,set()))
+ []
+ >>> list(f(1,set()))
+ [0]
+ >>> list(f(0,{0}))
+ []
+ >>> list(f(1,{0}))
+ [1]
+ >>> list(f(0,{0,1}))
+ []
+ >>> list(f(1,{0,1}))
+ [2]
+ >>> list(f(0,{0,2}))
+ []
+ >>> list(f(1,{0,2}))
+ [1]
+ >>> list(f(2,{0,2}))
+ [1, 3]
+ >>> list(f(3,{0,2}))
+ [1, 3, 4]
+ """
+ assert isinstance( used, set )
+ first_free = max( used ) + 1 if used else 0
+ complete = set( range( 0, len( used ) ) )
+ gaps = sorted( complete - used )
+ return islice( concat( gaps, count( first_free ) ), num )
diff --git a/lib/src/cgcloud_Crypto/Hash/MD5.py b/lib/src/cgcloud_Crypto/Hash/MD5.py
new file mode 100644
index 0000000..1d74346
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Hash/MD5.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""MD5 cryptographic hash algorithm.
+
+MD5 is specified in RFC1321_ and produces the 128 bit digest of a message.
+
+ >>> from cgcloud_Crypto.Hash import MD5
+ >>>
+ >>> h = MD5.new()
+ >>> h.update(b'Hello')
+ >>> print h.hexdigest()
+
+MD5 stand for Message Digest version 5, and it was invented by Rivest in 1991.
+
+This algorithm is insecure. Do not use it for new designs.
+
+.. _RFC1321: http://tools.ietf.org/html/rfc1321
+"""
+
+from __future__ import nested_scopes
+
+_revision__ = "$Id$"
+
+__all__ = ['new', 'block_size', 'digest_size']
+
+from cgcloud_Crypto.Util.py3compat import *
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+
+def __make_constructor():
+ try:
+ # The md5 module is deprecated in Python 2.6, so use hashlib when possible.
+ from hashlib import md5 as _hash_new
+ except ImportError:
+ from md5 import new as _hash_new
+
+ h = _hash_new()
+ if hasattr(h, 'new') and hasattr(h, 'name') and hasattr(h, 'digest_size') and hasattr(h, 'block_size'):
+ # The module from stdlib has the API that we need. Just use it.
+ return _hash_new
+ else:
+ # Wrap the hash object in something that gives us the expected API.
+ _copy_sentinel = object()
+ class _MD5(object):
+ digest_size = 16
+ block_size = 64
+ name = "md5"
+ def __init__(self, *args):
+ if args and args[0] is _copy_sentinel:
+ self._h = args[1]
+ else:
+ self._h = _hash_new(*args)
+ def copy(self):
+ return _MD5(_copy_sentinel, self._h.copy())
+ def update(self, *args):
+ f = self.update = self._h.update
+ f(*args)
+ def digest(self):
+ f = self.digest = self._h.digest
+ return f()
+ def hexdigest(self):
+ f = self.hexdigest = self._h.hexdigest
+ return f()
+ _MD5.new = _MD5
+ return _MD5
+
+new = __make_constructor()
+del __make_constructor
+
+#: The size of the resulting hash in bytes.
+digest_size = new().digest_size
+
+#: The internal block size of the hash algorithm in bytes.
+block_size = new().block_size
diff --git a/lib/src/cgcloud_Crypto/Hash/__init__.py b/lib/src/cgcloud_Crypto/Hash/__init__.py
new file mode 100644
index 0000000..9c4ecbe
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Hash/__init__.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Hashing algorithms
+
+Hash functions take arbitrary binary strings as input, and produce a random-like output
+of fixed size that is dependent on the input; it should be practically infeasible
+to derive the original input data given only the hash function's
+output. In other words, the hash function is *one-way*.
+
+It should also not be practically feasible to find a second piece of data
+(a *second pre-image*) whose hash is the same as the original message
+(*weak collision resistance*).
+
+Finally, it should not be feasible to find two arbitrary messages with the
+same hash (*strong collision resistance*).
+
+The output of the hash function is called the *digest* of the input message.
+In general, the security of a hash function is related to the length of the
+digest. If the digest is *n* bits long, its security level is roughly comparable
+to the the one offered by an *n/2* bit encryption algorithm.
+
+Hash functions can be used simply as a integrity check, or, in
+association with a public-key algorithm, can be used to implement
+digital signatures.
+
+The hashing modules here all support the interface described in `PEP
+247`_ , "API for Cryptographic Hash Functions".
+
+.. _`PEP 247` : http://www.python.org/dev/peps/pep-0247/
+
+:undocumented: _MD2, _MD4, _RIPEMD160, _SHA224, _SHA256, _SHA384, _SHA512
+"""
+
+__all__ = [ 'MD5' ]
+
+__revision__ = "$Id$"
+
+import sys
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.py3compat import *
+
+def new(algo, *args):
+ """Initialize a new hash object.
+
+ The first argument to this function may be an algorithm name or another
+ hash object.
+
+ This function has significant overhead. It's recommended that you instead
+ import and use the individual hash modules directly.
+ """
+
+ # Try just invoking algo.new()
+ # We do this first so that this is the fastest.
+ try:
+ new_func = algo.new
+ except AttributeError:
+ pass
+ else:
+ return new_func(*args)
+
+ # Try getting the algorithm name.
+ if isinstance(algo, str):
+ name = algo
+ else:
+ try:
+ name = algo.name
+ except AttributeError:
+ raise ValueError("unsupported hash type %r" % (algo,))
+
+ # Got the name. Let's see if we have a PyCrypto implementation.
+ try:
+ new_func = _new_funcs[name]
+ except KeyError:
+ # No PyCrypto implementation. Try hashlib.
+ try:
+ import hashlib
+ except ImportError:
+ # There is no hashlib.
+ raise ValueError("unsupported hash type %s" % (name,))
+ return hashlib.new(name, *args)
+ else:
+ # We have a PyCrypto implementation. Instantiate it.
+ return new_func(*args)
+
+# This dict originally gets the following _*_new methods, but its members get
+# replaced with the real new() methods of the various hash modules as they are
+# used. We do it without locks to improve performance, which is safe in
+# CPython because dict access is atomic in CPython. This might break PyPI.
+_new_funcs = {}
+
+def _md5_new(*args):
+ from cgcloud_Crypto.Hash import MD5
+ _new_funcs['MD5'] = _new_funcs['md5'] = MD5.new
+ return MD5.new(*args)
+_new_funcs['MD5'] = _new_funcs['md5'] = _md5_new
+del _md5_new
diff --git a/lib/src/cgcloud_Crypto/IO/PEM.py b/lib/src/cgcloud_Crypto/IO/PEM.py
new file mode 100644
index 0000000..2a7769b
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/IO/PEM.py
@@ -0,0 +1,90 @@
+# -*- coding: ascii -*-
+#
+# Util/PEM.py : Privacy Enhanced Mail utilities
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+"""Set of functions for encapsulating data according to the PEM format.
+
+PEM (Privacy Enhanced Mail) was an IETF standard for securing emails via a
+Public Key Infrastructure. It is specified in RFC 1421-1424.
+
+Even though it has been abandoned, the simple message encapsulation it defined
+is still widely used today for encoding *binary* cryptographic objects like
+keys and certificates into text.
+"""
+
+__all__ = ['encode', 'decode']
+
+import sys
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.py3compat import *
+
+import re
+from binascii import hexlify, unhexlify, a2b_base64, b2a_base64
+
+from cgcloud_Crypto.Hash import MD5
+
+
+def decode(pem_data, passphrase=None):
+ """Decode a PEM block into binary.
+
+ :Parameters:
+ pem_data : string
+ The PEM block.
+ passphrase : byte string
+ If given and the PEM block is encrypted,
+ the key will be derived from the passphrase.
+ :Returns:
+ A tuple with the binary data, the marker string, and a boolean to
+ indicate if decryption was performed.
+ :Raises ValueError:
+ If decoding fails, if the PEM file is encrypted and no passphrase has
+ been provided or if the passphrase is incorrect.
+ """
+
+ # Verify Pre-Encapsulation Boundary
+ r = re.compile("\s*-----BEGIN (.*)-----\n")
+ m = r.match(pem_data)
+ if not m:
+ raise ValueError("Not a valid PEM pre boundary")
+ marker = m.group(1)
+
+ # Verify Post-Encapsulation Boundary
+ r = re.compile("-----END (.*)-----\s*$")
+ m = r.search(pem_data)
+ if not m or m.group(1) != marker:
+ raise ValueError("Not a valid PEM post boundary")
+
+ # Removes spaces and slit on lines
+ lines = pem_data.replace(" ", '').split()
+
+ # Decrypts, if necessary
+ if lines[1].startswith('Proc-Type:4,ENCRYPTED'):
+ assert False # code deleted
+ else:
+ objdec = None
+
+ # Decode body
+ data = a2b_base64(b(''.join(lines[1:-1])))
+ enc_flag = False
+ if objdec:
+ assert False # code deleted
+
+ return (data, marker, enc_flag)
diff --git a/lib/src/cgcloud_Crypto/IO/PKCS8.py b/lib/src/cgcloud_Crypto/IO/PKCS8.py
new file mode 100644
index 0000000..0d224c9
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/IO/PKCS8.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+#
+# PublicKey/PKCS8.py : PKCS#8 functions
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+"""
+Module for handling private keys wrapped according to `PKCS#8`_.
+
+PKCS8 is a standard for storing and transferring private key information.
+The wrapped key can either be clear or encrypted.
+
+All encryption algorithms are based on passphrase-based key derivation.
+The following mechanisms are fully supported:
+
+* *PBKDF2WithHMAC-SHA1AndAES128-CBC*
+* *PBKDF2WithHMAC-SHA1AndAES192-CBC*
+* *PBKDF2WithHMAC-SHA1AndAES256-CBC*
+* *PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC*
+
+The following mechanisms are only supported for importing keys.
+They are much weaker than the ones listed above, and they are provided
+for backward compatibility only:
+
+* *pbeWithMD5AndRC2-CBC*
+* *pbeWithMD5AndDES-CBC*
+* *pbeWithSHA1AndRC2-CBC*
+* *pbeWithSHA1AndDES-CBC*
+
+.. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt
+
+"""
+
+import sys
+
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.py3compat import *
+
+from cgcloud_Crypto.Util.asn1 import *
+
+# from Crypto.IO._PBES import PBES1, PBES2
+
+__all__ = ['wrap', 'unwrap']
+
+
+def decode_der(obj_class, binstr):
+ """Instantiate a DER object class, decode a DER binary string in it, and
+ return the object."""
+ der = obj_class()
+ der.decode(binstr)
+ return der
+
+
+def wrap(private_key, key_oid, passphrase=None, protection=None,
+ prot_params=None, key_params=None, randfunc=None):
+ """Wrap a private key into a PKCS#8 blob (clear or encrypted).
+
+ :Parameters:
+
+ private_key : byte string
+ The private key encoded in binary form. The actual encoding is
+ algorithm specific. In most cases, it is DER.
+
+ key_oid : string
+ The object identifier (OID) of the private key to wrap.
+ It is a dotted string, like "``1.2.840.113549.1.1.1``" (for RSA keys).
+
+ passphrase : (binary) string
+ The secret passphrase from which the wrapping key is derived.
+ Set it only if encryption is required.
+
+ protection : string
+ The identifier of the algorithm to use for securely wrapping the key.
+ The default value is '``PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC``'.
+
+ prot_params : dictionary
+ Parameters for the protection algorithm.
+
+ +------------------+-----------------------------------------------+
+ | Key | Description |
+ +==================+===============================================+
+ | iteration_count | The KDF algorithm is repeated several times to|
+ | | slow down brute force attacks on passwords. |
+ | | The default value is 1 000. |
+ +------------------+-----------------------------------------------+
+ | salt_size | Salt is used to thwart dictionary and rainbow |
+ | | attacks on passwords. The default value is 8 |
+ | | bytes. |
+ +------------------+-----------------------------------------------+
+
+ key_params : DER object
+ The algorithm parameters associated to the private key.
+ It is required for algorithms like DSA, but not for others like RSA.
+
+ randfunc : callable
+ Random number generation function; it should accept a single integer
+ N and return a string of random data, N bytes long.
+ If not specified, a new RNG will be instantiated
+ from ``Crypto.Random``.
+
+ :Return:
+ The PKCS#8-wrapped private key (possibly encrypted),
+ as a binary string.
+ """
+
+ if key_params is None:
+ key_params = DerNull()
+
+ #
+ # PrivateKeyInfo ::= SEQUENCE {
+ # version Version,
+ # privateKeyAlgorithm PrivateKeyAlgorithmIdentifier,
+ # privateKey PrivateKey,
+ # attributes [0] IMPLICIT Attributes OPTIONAL
+ # }
+ #
+ pk_info = newDerSequence(
+ 0,
+ newDerSequence(
+ DerObjectId(key_oid),
+ key_params
+ ),
+ newDerOctetString(private_key)
+ )
+ pk_info_der = pk_info.encode()
+
+ if not passphrase:
+ return pk_info_der
+
+ assert False # code deleted
diff --git a/lib/src/cgcloud_Crypto/IO/__init__.py b/lib/src/cgcloud_Crypto/IO/__init__.py
new file mode 100644
index 0000000..86776c4
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/IO/__init__.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""
+Modules for reading and writing cryptographic data.
+
+======================== =============================================
+Module Description
+======================== =============================================
+Crypto.Util.PEM Set of functions for encapsulating data according to the PEM format.
+Crypto.Util.PKCS8 Set of functions for wrapping/unwrapping private keys.
+======================== =============================================
+"""
+
+__all__ = ['PEM', 'PKCS8']
diff --git a/lib/src/cgcloud_Crypto/PublicKey/RSA.py b/lib/src/cgcloud_Crypto/PublicKey/RSA.py
new file mode 100644
index 0000000..0c5e0ba
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/PublicKey/RSA.py
@@ -0,0 +1,503 @@
+# -*- coding: utf-8 -*-
+#
+# PublicKey/RSA.py : RSA public key primitive
+#
+# Written in 2008 by Dwayne C. Litzenberger <dlitz at dlitz.net>
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""RSA public-key cryptography algorithm (signature and encryption).
+
+RSA_ is the most widespread and used public key algorithm. Its security is
+based on the difficulty of factoring large integers. The algorithm has
+withstood attacks for 30 years, and it is therefore considered reasonably
+secure for new designs.
+
+The algorithm can be used for both confidentiality (encryption) and
+authentication (digital signature). It is worth noting that signing and
+decryption are significantly slower than verification and encryption.
+The cryptograhic strength is primarily linked to the length of the modulus *n*.
+In 2012, a sufficient length is deemed to be 2048 bits. For more information,
+see the most recent ECRYPT_ report.
+
+Both RSA ciphertext and RSA signature are as big as the modulus *n* (256
+bytes if *n* is 2048 bit long).
+
+This module provides facilities for generating fresh, new RSA keys, constructing
+them from known components, exporting them, and importing them.
+
+ >>> from cgcloud_Crypto.PublicKey import RSA
+ >>>
+ >>> key = RSA.generate(2048)
+ >>> f = open('mykey.pem','w')
+ >>> f.write(key.exportKey('PEM'))
+ >>> f.close()
+ ...
+ >>> f = open('mykey.pem','r')
+ >>> key = RSA.importKey(f.read())
+
+Even though you may choose to directly use the methods of an RSA key object
+to perform the primitive cryptographic operations (e.g. `_RSAobj.encrypt`),
+it is recommended to use one of the standardized schemes instead (like
+`Crypto.Cipher.PKCS1_v1_5` or `Crypto.Signature.PKCS1_v1_5`).
+
+.. _RSA: http://en.wikipedia.org/wiki/RSA_%28algorithm%29
+.. _ECRYPT: http://www.ecrypt.eu.org/documents/D.SPA.17.pdf
+
+:sort: generate,construct,importKey,error
+"""
+
+__revision__ = "$Id$"
+
+__all__ = ['generate', 'construct', 'error', 'importKey', 'RSAImplementation',
+ '_RSAobj', 'oid' , 'algorithmIdentifier' ]
+
+import sys
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.py3compat import *
+
+from cgcloud_Crypto.Util.number import bytes_to_long, long_to_bytes
+
+from cgcloud_Crypto.PublicKey import _slowmath
+from cgcloud_Crypto.IO import PKCS8, PEM
+
+from cgcloud_Crypto.Util.asn1 import *
+
+import binascii
+import struct
+
+from cgcloud_Crypto.Util.number import inverse
+
+try:
+ from cgcloud_Crypto.PublicKey import _fastmath
+except ImportError:
+ _fastmath = None
+
+def decode_der(obj_class, binstr):
+ """Instantiate a DER object class, decode a DER binary string in it, and
+ return the object."""
+ der = obj_class()
+ der.decode(binstr)
+ return der
+
+class _RSAobj():
+ """Class defining an actual RSA key.
+
+ :undocumented: __getstate__, __setstate__, __repr__, __getattr__
+ """
+ #: Dictionary of RSA parameters.
+ #:
+ #: A public key will only have the following entries:
+ #:
+ #: - **n**, the modulus.
+ #: - **e**, the public exponent.
+ #:
+ #: A private key will also have:
+ #:
+ #: - **d**, the private exponent.
+ #: - **p**, the first factor of n.
+ #: - **q**, the second factor of n.
+ #: - **u**, the CRT coefficient (1/p) mod q.
+ keydata = ['n', 'e', 'd', 'p', 'q', 'u']
+
+ def __init__(self, implementation, key, randfunc=None):
+ self.implementation = implementation
+ self.key = key
+ # if randfunc is None:
+ # randfunc = Random.new().read
+ # self._randfunc = randfunc
+
+ def __getattr__(self, attrname):
+ if attrname in self.keydata:
+ # For backward compatibility, allow the user to get (not set) the
+ # RSA key parameters directly from this object.
+ return getattr(self.key, attrname)
+ else:
+ raise AttributeError("%s object has no %r attribute" % (self.__class__.__name__, attrname,))
+
+ def has_private(self):
+ return self.key.has_private()
+
+ def size(self):
+ return self.key.size()
+
+ def can_blind(self):
+ return True
+
+ def can_encrypt(self):
+ return True
+
+ def can_sign(self):
+ return True
+
+ def publickey(self):
+ return self.implementation.construct((self.key.n, self.key.e))
+
+ def exportKey(self, format='PEM', passphrase=None, pkcs=1, protection=None):
+ """Export this RSA key.
+
+ :Parameters:
+ format : string
+ The format to use for wrapping the key:
+
+ - *'DER'*. Binary encoding.
+ - *'PEM'*. Textual encoding, done according to `RFC1421`_/`RFC1423`_.
+ - *'OpenSSH'*. Textual encoding, done according to OpenSSH specification.
+ Only suitable for public keys (not private keys).
+
+ passphrase : string
+ For private keys only. The pass phrase used for deriving the encryption
+ key.
+
+ pkcs : integer
+ For *DER* and *PEM* format only.
+ The PKCS standard to follow for assembling the components of the key.
+ You have two choices:
+
+ - **1** (default): the public key is embedded into
+ an X.509 ``SubjectPublicKeyInfo`` DER SEQUENCE.
+ The private key is embedded into a `PKCS#1`_
+ ``RSAPrivateKey`` DER SEQUENCE.
+ - **8**: the private key is embedded into a `PKCS#8`_
+ ``PrivateKeyInfo`` DER SEQUENCE. This value cannot be used
+ for public keys.
+
+ protection : string
+ The encryption scheme to use for protecting the private key.
+
+ If ``None`` (default), the behavior depends on ``format``:
+
+ - For *DER*, the *PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC*
+ scheme is used. The following operations are performed:
+
+ 1. A 16 byte Triple DES key is derived from the passphrase
+ using `Crypto.Protocol.KDF.PBKDF2` with 8 bytes salt,
+ and 1 000 iterations of `Crypto.Hash.HMAC`.
+ 2. The private key is encrypted using CBC.
+ 3. The encrypted key is encoded according to PKCS#8.
+
+ - For *PEM*, the obsolete PEM encryption scheme is used.
+ It is based on MD5 for key derivation, and Triple DES for encryption.
+
+ Specifying a value for ``protection`` is only meaningful for PKCS#8
+ (that is, ``pkcs=8``) and only if a pass phrase is present too.
+
+ The supported schemes for PKCS#8 are listed in the
+ `Crypto.IO.PKCS8` module (see ``wrap_algo`` parameter).
+
+ :Return: A byte string with the encoded public or private half
+ of the key.
+ :Raise ValueError:
+ When the format is unknown or when you try to encrypt a private
+ key with *DER* format and PKCS#1.
+ :attention:
+ If you don't provide a pass phrase, the private key will be
+ exported in the clear!
+
+ .. _RFC1421: http://www.ietf.org/rfc/rfc1421.txt
+ .. _RFC1423: http://www.ietf.org/rfc/rfc1423.txt
+ .. _`PKCS#1`: http://www.ietf.org/rfc/rfc3447.txt
+ .. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt
+ """
+ if passphrase is not None:
+ passphrase = tobytes(passphrase)
+ if format=='OpenSSH':
+ eb = long_to_bytes(self.e)
+ nb = long_to_bytes(self.n)
+ if bord(eb[0]) & 0x80: eb=bchr(0x00)+eb
+ if bord(nb[0]) & 0x80: nb=bchr(0x00)+nb
+ keyparts = [ b('ssh-rsa'), eb, nb ]
+ keystring = b('').join([ struct.pack(">I",len(kp))+kp for kp in keyparts])
+ return b('ssh-rsa ')+binascii.b2a_base64(keystring)[:-1]
+
+ # DER format is always used, even in case of PEM, which simply
+ # encodes it into BASE64.
+ if self.has_private():
+ binary_key = newDerSequence(
+ 0,
+ self.n,
+ self.e,
+ self.d,
+ self.p,
+ self.q,
+ self.d % (self.p-1),
+ self.d % (self.q-1),
+ inverse(self.q, self.p)
+ ).encode()
+ if pkcs==1:
+ keyType = 'RSA PRIVATE'
+ if format=='DER' and passphrase:
+ raise ValueError("PKCS#1 private key cannot be encrypted")
+ else: # PKCS#8
+ if format=='PEM' and protection is None:
+ keyType = 'PRIVATE'
+ binary_key = PKCS8.wrap(binary_key, oid, None)
+ else:
+ keyType = 'ENCRYPTED PRIVATE'
+ if not protection:
+ protection = 'PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC'
+ binary_key = PKCS8.wrap(binary_key, oid, passphrase, protection)
+ passphrase = None
+ else:
+ keyType = "RSA PUBLIC"
+ binary_key = newDerSequence(
+ algorithmIdentifier,
+ newDerBitString(
+ newDerSequence( self.n, self.e )
+ )
+ ).encode()
+ if format=='DER':
+ return binary_key
+ if format=='PEM':
+ pem_str = PEM.encode(binary_key, keyType+" KEY", passphrase, self._randfunc)
+ return tobytes(pem_str)
+ raise ValueError("Unknown key format '%s'. Cannot export the RSA key." % format)
+
+class RSAImplementation(object):
+ """
+ An RSA key factory.
+
+ This class is only internally used to implement the methods of the `Crypto.PublicKey.RSA` module.
+
+ :sort: __init__,generate,construct,importKey
+ :undocumented: _g*, _i*
+ """
+
+ def __init__(self, **kwargs):
+ """Create a new RSA key factory.
+
+ :Keywords:
+ use_fast_math : bool
+ Specify which mathematic library to use:
+
+ - *None* (default). Use fastest math available.
+ - *True* . Use fast math.
+ - *False* . Use slow math.
+ default_randfunc : callable
+ Specify how to collect random data:
+
+ - *None* (default). Use Random.new().read().
+ - not *None* . Use the specified function directly.
+ :Raise RuntimeError:
+ When **use_fast_math** =True but fast math is not available.
+ """
+ use_fast_math = kwargs.get('use_fast_math', None)
+ if use_fast_math is None: # Automatic
+ if _fastmath is not None:
+ self._math = _fastmath
+ else:
+ self._math = _slowmath
+
+ elif use_fast_math: # Explicitly select fast math
+ if _fastmath is not None:
+ self._math = _fastmath
+ else:
+ raise RuntimeError("fast math module not available")
+
+ else: # Explicitly select slow math
+ self._math = _slowmath
+
+ self.error = self._math.error
+
+ self._default_randfunc = kwargs.get('default_randfunc', None)
+ self._current_randfunc = None
+
+ def construct(self, tup):
+ """Construct an RSA key from a tuple of valid RSA components.
+
+ The modulus **n** must be the product of two primes.
+ The public exponent **e** must be odd and larger than 1.
+
+ In case of a private key, the following equations must apply:
+
+ - e != 1
+ - p*q = n
+ - e*d = 1 mod (p-1)(q-1)
+ - p*u = 1 mod q
+
+ :Parameters:
+ tup : tuple
+ A tuple of long integers, with at least 2 and no
+ more than 6 items. The items come in the following order:
+
+ 1. RSA modulus (n).
+ 2. Public exponent (e).
+ 3. Private exponent (d). Only required if the key is private.
+ 4. First factor of n (p). Optional.
+ 5. Second factor of n (q). Optional.
+ 6. CRT coefficient, (1/p) mod q (u). Optional.
+
+ :Return: An RSA key object (`_RSAobj`).
+ """
+ key = self._math.rsa_construct(*tup)
+ return _RSAobj(self, key)
+
+ def _importKeyDER(self, extern_key, passphrase=None):
+ """Import an RSA key (public or private half), encoded in DER form."""
+
+ try:
+
+ der = decode_der(DerSequence, extern_key)
+
+ # Try PKCS#1 first, for a private key
+ if len(der) == 9 and der.hasOnlyInts() and der[0] == 0:
+ # ASN.1 RSAPrivateKey element
+ del der[6:] # Remove d mod (p-1),
+ # d mod (q-1), and
+ # q^{-1} mod p
+ der.append(inverse(der[4], der[5])) # Add p^{-1} mod q
+ del der[0] # Remove version
+ return self.construct(der[:])
+
+ # Keep on trying PKCS#1, but now for a public key
+ if len(der) == 2:
+ try:
+ # The DER object is an RSAPublicKey SEQUENCE with
+ # two elements
+ if der.hasOnlyInts():
+ return self.construct(der[:])
+ # The DER object is a SubjectPublicKeyInfo SEQUENCE
+ # with two elements: an 'algorithmIdentifier' and a
+ # 'subjectPublicKey'BIT STRING.
+ # 'algorithmIdentifier' takes the value given at the
+ # module level.
+ # 'subjectPublicKey' encapsulates the actual ASN.1
+ # RSAPublicKey element.
+ if der[0] == algorithmIdentifier:
+ bitmap = decode_der(DerBitString, der[1])
+ rsaPub = decode_der(DerSequence, bitmap.value)
+ if len(rsaPub) == 2 and rsaPub.hasOnlyInts():
+ return self.construct(rsaPub[:])
+ except (ValueError, EOFError):
+ pass
+
+ # Try PKCS#8 (possibly encrypted)
+ k = PKCS8.unwrap(extern_key, passphrase)
+ if k[0] == oid:
+ return self._importKeyDER(k[1], passphrase)
+
+ except (ValueError, EOFError):
+ pass
+
+ raise ValueError("RSA key format is not supported")
+
+ def importKey(self, extern_key, passphrase=None):
+ """Import an RSA key (public or private half), encoded in standard
+ form.
+
+ :Parameter extern_key:
+ The RSA key to import, encoded as a string.
+
+ An RSA public key can be in any of the following formats:
+
+ - X.509 ``subjectPublicKeyInfo`` DER SEQUENCE (binary or PEM
+ encoding)
+ - `PKCS#1`_ ``RSAPublicKey`` DER SEQUENCE (binary or PEM encoding)
+ - OpenSSH (textual public key only)
+
+ An RSA private key can be in any of the following formats:
+
+ - PKCS#1 ``RSAPrivateKey`` DER SEQUENCE (binary or PEM encoding)
+ - `PKCS#8`_ ``PrivateKeyInfo`` or ``EncryptedPrivateKeyInfo``
+ DER SEQUENCE (binary or PEM encoding)
+ - OpenSSH (textual public key only)
+
+ For details about the PEM encoding, see `RFC1421`_/`RFC1423`_.
+
+ The private key may be encrypted by means of a certain pass phrase
+ either at the PEM level or at the PKCS#8 level.
+ :Type extern_key: string
+
+ :Parameter passphrase:
+ In case of an encrypted private key, this is the pass phrase from
+ which the decryption key is derived.
+ :Type passphrase: string
+
+ :Return: An RSA key object (`_RSAobj`).
+
+ :Raise ValueError/IndexError/TypeError:
+ When the given key cannot be parsed (possibly because the pass
+ phrase is wrong).
+
+ .. _RFC1421: http://www.ietf.org/rfc/rfc1421.txt
+ .. _RFC1423: http://www.ietf.org/rfc/rfc1423.txt
+ .. _`PKCS#1`: http://www.ietf.org/rfc/rfc3447.txt
+ .. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt
+ """
+ extern_key = tobytes(extern_key)
+ if passphrase is not None:
+ passphrase = tobytes(passphrase)
+
+ if extern_key.startswith(b('-----')):
+ # This is probably a PEM encoded key.
+ (der, marker, enc_flag) = PEM.decode(tostr(extern_key), passphrase)
+ if enc_flag:
+ passphrase = None
+ return self._importKeyDER(der, passphrase)
+
+ if extern_key.startswith(b('ssh-rsa ')):
+ # This is probably an OpenSSH key
+ keystring = binascii.a2b_base64(extern_key.split(b(' '))[1])
+ keyparts = []
+ while len(keystring) > 4:
+ l = struct.unpack(">I", keystring[:4])[0]
+ keyparts.append(keystring[4:4 + l])
+ keystring = keystring[4 + l:]
+ e = bytes_to_long(keyparts[1])
+ n = bytes_to_long(keyparts[2])
+ return self.construct([n, e])
+
+ if bord(extern_key[0]) == 0x30:
+ # This is probably a DER encoded key
+ return self._importKeyDER(extern_key, passphrase)
+
+ raise ValueError("RSA key format is not supported")
+
+#: `Object ID`_ for the RSA encryption algorithm. This OID often indicates
+#: a generic RSA key, even when such key will be actually used for digital
+#: signatures.
+#:
+#: .. _`Object ID`: http://www.alvestrand.no/objectid/1.2.840.113549.1.1.1.html
+oid = "1.2.840.113549.1.1.1"
+
+#: This is the standard DER object that qualifies a cryptographic algorithm
+#: in ASN.1-based data structures (e.g. X.509 certificates).
+algorithmIdentifier = DerSequence(
+ [DerObjectId(oid).encode(), # algorithm field
+ DerNull().encode()] # parameters field
+ ).encode()
+
+_impl = RSAImplementation()
+#:
+#: Construct an RSA key object from a tuple of valid RSA components.
+#:
+#: See `RSAImplementation.construct`.
+#:
+construct = _impl.construct
+#:
+#: Import an RSA key (public or private half), encoded in standard form.
+#:
+#: See `RSAImplementation.importKey`.
+#:
+importKey = _impl.importKey
+error = _impl.error
+
+# vim:set ts=4 sw=4 sts=4 expandtab:
+
diff --git a/lib/src/cgcloud_Crypto/PublicKey/__init__.py b/lib/src/cgcloud_Crypto/PublicKey/__init__.py
new file mode 100644
index 0000000..3a02b28
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/PublicKey/__init__.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Public-key encryption and signature algorithms.
+
+Public-key encryption uses two different keys, one for encryption and
+one for decryption. The encryption key can be made public, and the
+decryption key is kept private. Many public-key algorithms can also
+be used to sign messages, and some can *only* be used for signatures.
+
+======================== =============================================
+Module Description
+======================== =============================================
+Crypto.PublicKey.DSA Digital Signature Algorithm (Signature only)
+Crypto.PublicKey.ElGamal (Signing and encryption)
+Crypto.PublicKey.RSA (Signing, encryption, and blinding)
+======================== =============================================
+
+:undocumented: _DSA, _RSA, _fastmath, _slowmath, pubkey
+"""
+
+__all__ = ['RSA']
+__revision__ = "$Id$"
+
diff --git a/lib/src/cgcloud_Crypto/PublicKey/_slowmath.py b/lib/src/cgcloud_Crypto/PublicKey/_slowmath.py
new file mode 100644
index 0000000..e17a54c
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/PublicKey/_slowmath.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+#
+# PubKey/RSA/_slowmath.py : Pure Python implementation of the RSA portions of _fastmath
+#
+# Written in 2008 by Dwayne C. Litzenberger <dlitz at dlitz.net>
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Pure Python implementation of the RSA-related portions of Crypto.PublicKey._fastmath."""
+
+__revision__ = "$Id$"
+
+__all__ = ['rsa_construct']
+
+import sys
+
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.number import inverse
+
+class error(Exception):
+ pass
+
+class _RSAKey(object):
+ def has_private(self):
+ return hasattr(self, 'd')
+
+def rsa_construct(n, e, d=None, p=None, q=None, u=None):
+ """Construct an RSAKey object"""
+ assert isinstance(n, long)
+ assert isinstance(e, long)
+ assert isinstance(d, (long, type(None)))
+ assert isinstance(p, (long, type(None)))
+ assert isinstance(q, (long, type(None)))
+ assert isinstance(u, (long, type(None)))
+ obj = _RSAKey()
+ obj.n = n
+ obj.e = e
+ if d is None:
+ return obj
+ obj.d = d
+ if p is not None and q is not None:
+ obj.p = p
+ obj.q = q
+ else:
+ assert False # code deleted
+ if u is not None:
+ obj.u = u
+ else:
+ obj.u = inverse(obj.p, obj.q)
+ return obj
+
+# vim:set ts=4 sw=4 sts=4 expandtab:
+
diff --git a/lib/src/cgcloud_Crypto/Util/__init__.py b/lib/src/cgcloud_Crypto/Util/__init__.py
new file mode 100644
index 0000000..b2030c0
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Util/__init__.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Miscellaneous modules
+
+Contains useful modules that don't belong into any of the
+other Crypto.* subpackages.
+
+======================== =============================================
+Module Description
+======================== =============================================
+`Crypto.Util.number` Number-theoretic functions (primality testing, etc.)
+`Crypto.Util.Counter` Fast counter functions for CTR cipher modes.
+`Crypto.Util.randpool` Random number generation
+`Crypto.Util.RFC1751` Converts between 128-bit keys and human-readable
+ strings of words.
+`Crypto.Util.asn1` Minimal support for ASN.1 DER encoding
+`Crypto.Util.Padding` Set of functions for adding and removing padding.
+======================== =============================================
+
+"""
+
+__all__ = ['randpool', 'RFC1751', 'number', 'strxor', 'asn1', 'Counter',
+ 'Padding' ]
+
+__revision__ = "$Id$"
+
diff --git a/lib/src/cgcloud_Crypto/Util/asn1.py b/lib/src/cgcloud_Crypto/Util/asn1.py
new file mode 100644
index 0000000..2753e4c
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Util/asn1.py
@@ -0,0 +1,899 @@
+# -*- coding: ascii -*-
+#
+# Util/asn1.py : Minimal support for ASN.1 DER binary encoding.
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+""" ASN.1 DER encoding and decoding
+
+This module provides minimal support for encoding and decoding `ASN.1`_ DER
+objects.
+
+.. _`ASN.1`: ftp://ftp.rsasecurity.com/pub/pkcs/ascii/layman.asc
+
+"""
+
+from __future__ import nested_scopes
+
+import sys
+
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+from cgcloud_Crypto.Util.py3compat import *
+if sys.version_info[0] == 2 and sys.version_info[1] == 1:
+ from cgcloud_Crypto.Util.py21compat import *
+
+from cgcloud_Crypto.Util.number import long_to_bytes, bytes_to_long
+
+__all__ = [ 'DerObject', 'DerInteger', 'DerOctetString', 'DerNull',
+ 'DerSequence', 'DerObjectId', 'DerBitString', 'DerSetOf',
+ 'newDerInteger', 'newDerOctetString', 'newDerSequence',
+ 'newDerObjectId', 'newDerBitString', 'newDerSetOf' ]
+
+def _isInt(x, onlyNonNegative=False):
+ test = 0
+ try:
+ test += x
+ except TypeError:
+ return False
+ return not onlyNonNegative or x>=0
+
+class BytesIO_EOF(BytesIO):
+ """This class differs from BytesIO in that an EOFError exception is
+ raised whenever EOF is reached."""
+
+ def __init__(self, *params):
+ BytesIO.__init__(self, *params)
+ self.setRecord(False)
+
+ def setRecord(self, record):
+ self._record = record
+ self._recording = b("")
+
+ def read(self, length):
+ s = BytesIO.read(self, length)
+ if len(s)<length:
+ raise EOFError
+ if self._record:
+ self._recording += s
+ return s
+
+ def read_byte(self):
+ return self.read(1)[0]
+
+class _NoDerElementError(EOFError):
+ pass
+
+class DerObject(object):
+ """Base class for defining a single DER object.
+
+ This class should never be directly instantiated.
+ """
+
+ def __init__(self, asn1Id=None, payload=b(''), implicit=None, constructed=False):
+ """Initialize the DER object according to a specific ASN.1 type.
+
+ :Parameters:
+ asn1Id : integer
+ The universal DER tag identifier for this object
+ (e.g. 0x10 for a SEQUENCE). If None, the tag is not known
+ yet.
+
+ payload : byte string
+ The initial payload of the object.
+ If not specified, the payload is empty.
+
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag *asn1Id*.
+
+ constructed : bool
+ True when the ASN.1 type is *constructed*.
+ False when it is *primitive*.
+ """
+
+ if asn1Id==None:
+ self._idOctet = None
+ return
+ asn1Id = self._convertTag(asn1Id)
+ self._implicit = implicit
+ if implicit:
+ # In a BER/DER identifier octet:
+ # * bits 4-0 contain the tag value
+ # * bit 5 is set if the type is 'construted'
+ # and unset if 'primitive'
+ # * bits 7-6 depend on the encoding class
+ #
+ # Class | Bit 7, Bit 6
+ # universal | 0 0
+ # application | 0 1
+ # context-spec | 1 0 (default for IMPLICIT)
+ # private | 1 1
+ #
+ self._idOctet = 0x80 | self._convertTag(implicit)
+ else:
+ self._idOctet = asn1Id
+ if constructed:
+ self._idOctet |= 0x20
+ self.payload = payload
+
+ def _convertTag(self, tag):
+ """Check if *tag* is a real DER tag.
+ Convert it from a character to number if necessary.
+ """
+ if not _isInt(tag):
+ if len(tag)==1:
+ tag = bord(tag[0])
+ # Ensure that tag is a low tag
+ if not (_isInt(tag) and 0 <= tag < 0x1F):
+ raise ValueError("Wrong DER tag")
+ return tag
+
+ def _lengthOctets(self):
+ """Build length octets according to the current object's payload.
+
+ Return a byte string that encodes the payload length (in
+ bytes) in a format suitable for DER length octets (L).
+ """
+ payloadLen = len(self.payload)
+ if payloadLen>127:
+ encoding = long_to_bytes(payloadLen)
+ return bchr(len(encoding)+128) + encoding
+ return bchr(payloadLen)
+
+ def encode(self):
+ """Return this DER element, fully encoded as a binary byte string."""
+ # Concatenate identifier octets, length octets,
+ # and contents octets
+ return bchr(self._idOctet) + self._lengthOctets() + self.payload
+
+ def _decodeLen(self, s):
+ """Decode DER length octets from a file."""
+
+ length = bord(s.read_byte())
+ if length<=127:
+ return length
+ payloadLength = bytes_to_long(s.read(length & 0x7F))
+ # According to DER (but not BER) the long form is used
+ # only when the length doesn't fit into 7 bits.
+ if payloadLength<=127:
+ raise ValueError("Not a DER length tag (but still valid BER).")
+ return payloadLength
+
+ def decode(self, derEle):
+ """Decode a complete DER element, and re-initializes this
+ object with it.
+
+ :Parameters:
+ derEle : byte string
+ A complete DER element.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+ """
+
+ s = BytesIO_EOF(derEle)
+ self._decodeFromStream(s)
+ # There shouldn't be other bytes left
+ try:
+ b = s.read_byte()
+ raise ValueError("Unexpected extra data after the DER structure")
+ except EOFError:
+ pass
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER element from a file."""
+
+ try:
+ idOctet = bord(s.read_byte())
+ except EOFError:
+ raise _NoDerElementError
+ if self._idOctet != None:
+ if idOctet != self._idOctet:
+ raise ValueError("Unexpected DER tag")
+ else:
+ self._idOctet = idOctet
+ length = self._decodeLen(s)
+ self.payload = s.read(length)
+
+class DerInteger(DerObject):
+ """Class to model a DER INTEGER.
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerInteger
+ >>> from binascii import hexlify, unhexlify
+ >>> int_der = DerInteger(9)
+ >>> print hexlify(int_der.encode())
+
+ which will show ``020109``, the DER encoding of 9.
+
+ And for decoding:
+
+ >>> s = unhexlify(b'020109')
+ >>> try:
+ >>> int_der = DerInteger()
+ >>> int_der.decode(s)
+ >>> print int_der.value
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER INTEGER"
+
+ the output will be ``9``.
+ """
+
+ def __init__(self, value=0, implicit=None):
+ """Initialize the DER object as an INTEGER.
+
+ :Parameters:
+ value : integer
+ The value of the integer.
+
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for INTEGER (2).
+ """
+
+ DerObject.__init__(self, 0x02, b(''), implicit, False)
+ self.value = value #: The integer value
+
+ def encode(self):
+ """Return the DER INTEGER, fully encoded as a
+ binary string."""
+
+ number = self.value
+ self.payload = b('')
+ while True:
+ self.payload = bchr(number&255) + self.payload
+ if 128 <= number <= 255:
+ self.payload = bchr(0x00) + self.payload
+ if -128 <= number <= 255:
+ break
+ number >>= 8
+ return DerObject.encode(self)
+
+ def decode(self, derEle):
+ """Decode a complete DER INTEGER DER, and re-initializes this
+ object with it.
+
+ :Parameters:
+ derEle : byte string
+ A complete INTEGER DER element.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+ """
+ DerObject.decode(self, derEle)
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER INTEGER from a file."""
+
+ # Fill up self.payload
+ DerObject._decodeFromStream(self, s)
+
+ # Derive self.value from self.payload
+ self.value = 0L
+ bits = 1
+ for i in self.payload:
+ self.value *= 256
+ self.value += bord(i)
+ bits <<= 8
+ if self.payload and bord(self.payload[0]) & 0x80:
+ self.value -= bits
+
+def newDerInteger(number):
+ """Create a DerInteger object, already initialized with an integer."""
+
+ der = DerInteger(number)
+ return der
+
+class DerSequence(DerObject):
+ """Class to model a DER SEQUENCE.
+
+ This object behaves like a dynamic Python sequence.
+
+ Sub-elements that are INTEGERs behave like Python integers.
+
+ Any other sub-element is a binary string encoded as a complete DER
+ sub-element (TLV).
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerSequence, DerInteger
+ >>> from binascii import hexlify, unhexlify
+ >>> obj_der = unhexlify('070102')
+ >>> seq_der = DerSequence([4])
+ >>> seq_der.append(9)
+ >>> seq_der.append(obj_der.encode())
+ >>> print hexlify(seq_der.encode())
+
+ which will show ``3009020104020109070102``, the DER encoding of the
+ sequence containing ``4``, ``9``, and the object with payload ``02``.
+
+ For decoding:
+
+ >>> s = unhexlify(b'3009020104020109070102')
+ >>> try:
+ >>> seq_der = DerSequence()
+ >>> seq_der.decode(s)
+ >>> print len(seq_der)
+ >>> print seq_der[0]
+ >>> print seq_der[:]
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER SEQUENCE"
+
+ the output will be::
+
+ 3
+ 4
+ [4L, 9L, b'\x07\x01\x02']
+
+ """
+
+ def __init__(self, startSeq=None, implicit=None):
+ """Initialize the DER object as a SEQUENCE.
+
+ :Parameters:
+ startSeq : Python sequence
+ A sequence whose element are either integers or
+ other DER objects.
+
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for SEQUENCE (16).
+ """
+
+ DerObject.__init__(self, 0x10, b(''), implicit, True)
+ if startSeq==None:
+ self._seq = []
+ else:
+ self._seq = startSeq
+
+ ## A few methods to make it behave like a python sequence
+
+ def __delitem__(self, n):
+ del self._seq[n]
+ def __getitem__(self, n):
+ return self._seq[n]
+ def __setitem__(self, key, value):
+ self._seq[key] = value
+ def __setslice__(self,i,j,sequence):
+ self._seq[i:j] = sequence
+ def __delslice__(self,i,j):
+ del self._seq[i:j]
+ def __getslice__(self, i, j):
+ return self._seq[max(0, i):max(0, j)]
+ def __len__(self):
+ return len(self._seq)
+ def __iadd__(self, item):
+ self._seq.append(item)
+ return self
+ def append(self, item):
+ self._seq.append(item)
+ return self
+
+ def hasInts(self, onlyNonNegative=True):
+ """Return the number of items in this sequence that are
+ integers.
+
+ :Parameters:
+ onlyNonNegative : boolean
+ If True, negative integers are not counted in.
+ """
+ def _isInt2(x):
+ return _isInt(x, onlyNonNegative)
+ return len(filter(_isInt2, self._seq))
+
+ def hasOnlyInts(self, onlyNonNegative=True):
+ """Return True if all items in this sequence are integers
+ or non-negative integers.
+
+ This function returns False is the sequence is empty,
+ or at least one member is not an integer.
+
+ :Parameters:
+ onlyNonNegative : boolean
+ If True, the presence of negative integers
+ causes the method to return False."""
+ return self._seq and self.hasInts(onlyNonNegative)==len(self._seq)
+
+ def encode(self):
+ """Return this DER SEQUENCE, fully encoded as a
+ binary string.
+
+ :Raises ValueError:
+ If some elements in the sequence are neither integers
+ nor byte strings.
+ """
+ self.payload = b('')
+ for item in self._seq:
+ try:
+ self.payload += item
+ except TypeError:
+ try:
+ self.payload += DerInteger(item).encode()
+ except TypeError:
+ raise ValueError("Trying to DER encode an unknown object")
+ return DerObject.encode(self)
+
+ def decode(self, derEle):
+ """Decode a complete DER SEQUENCE, and re-initializes this
+ object with it.
+
+ :Parameters:
+ derEle : byte string
+ A complete SEQUENCE DER element.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+
+ DER INTEGERs are decoded into Python integers. Any other DER
+ element is not decoded. Its validity is not checked.
+ """
+ DerObject.decode(self, derEle)
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER SEQUENCE from a file."""
+
+ self._seq = []
+
+ # Fill up self.payload
+ DerObject._decodeFromStream(self, s)
+
+ # Add one item at a time to self.seq, by scanning self.payload
+ p = BytesIO_EOF(self.payload)
+ while True:
+ try:
+ p.setRecord(True)
+ der = DerObject()
+ der._decodeFromStream(p)
+
+ # Parse INTEGERs differently
+ if der._idOctet != 0x02:
+ self._seq.append(p._recording)
+ else:
+ derInt = DerInteger()
+ derInt.decode(p._recording)
+ self._seq.append(derInt.value)
+
+ except _NoDerElementError:
+ break
+ # end
+
+def newDerSequence(*der_objs):
+ """Create a DerSequence object, already initialized with all objects
+ passed as parameters."""
+
+ der = DerSequence()
+ for obj in der_objs:
+ if isinstance(obj, DerObject):
+ der += obj.encode()
+ else:
+ der += obj
+ return der
+
+class DerOctetString(DerObject):
+ """Class to model a DER OCTET STRING.
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerOctetString
+ >>> from binascii import hexlify, unhexlify
+ >>> os_der = DerOctetString(b'\\xaa')
+ >>> os_der.payload += b'\\xbb'
+ >>> print hexlify(os_der.encode())
+
+ which will show ``0402aabb``, the DER encoding for the byte string
+ ``b'\\xAA\\xBB'``.
+
+ For decoding:
+
+ >>> s = unhexlify(b'0402aabb')
+ >>> try:
+ >>> os_der = DerOctetString()
+ >>> os_der.decode(s)
+ >>> print hexlify(os_der.payload)
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER OCTET STRING"
+
+ the output will be ``aabb``.
+ """
+
+ def __init__(self, value=b(''), implicit=None):
+ """Initialize the DER object as an OCTET STRING.
+
+ :Parameters:
+ value : byte string
+ The initial payload of the object.
+ If not specified, the payload is empty.
+
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for OCTET STRING (4).
+ """
+ DerObject.__init__(self, 0x04, value, implicit, False)
+
+def newDerOctetString(binstring):
+ """Create a DerOctetString object, already initialized with the binary
+ string."""
+
+ if isinstance(binstring, DerObject):
+ der = DerOctetString(binstring.encode())
+ else:
+ der = DerOctetString(binstring)
+ return der
+
+class DerNull(DerObject):
+ """Class to model a DER NULL element."""
+
+ def __init__(self):
+ """Initialize the DER object as a NULL."""
+
+ DerObject.__init__(self, 0x05, b(''), False)
+
+class DerObjectId(DerObject):
+ """Class to model a DER OBJECT ID.
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerObjectId
+ >>> from binascii import hexlify, unhexlify
+ >>> oid_der = DerObjectId("1.2")
+ >>> oid_der.value += ".840.113549.1.1.1"
+ >>> print hexlify(oid_der.encode())
+
+ which will show ``06092a864886f70d010101``, the DER encoding for the
+ RSA Object Identifier ``1.2.840.113549.1.1.1``.
+
+ For decoding:
+
+ >>> s = unhexlify(b'06092a864886f70d010101')
+ >>> try:
+ >>> oid_der = DerObjectId()
+ >>> oid_der.decode(s)
+ >>> print oid_der.value
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER OBJECT ID"
+
+ the output will be ``1.2.840.113549.1.1.1``.
+ """
+
+ def __init__(self, value='', implicit=None):
+ """Initialize the DER object as an OBJECT ID.
+
+ :Parameters:
+ value : string
+ The initial Object Identifier (e.g. "1.2.0.0.6.2").
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for OBJECT ID (6).
+ """
+ DerObject.__init__(self, 0x06, b(''), implicit, False)
+ self.value = value #: The Object ID, a dot separated list of integers
+
+ def encode(self):
+ """Return the DER OBJECT ID, fully encoded as a
+ binary string."""
+
+ comps = map(int,self.value.split("."))
+ if len(comps)<2:
+ raise ValueError("Not a valid Object Identifier string")
+ self.payload = bchr(40*comps[0]+comps[1])
+ for v in comps[2:]:
+ enc = []
+ while v:
+ enc.insert(0, (v & 0x7F) | 0x80)
+ v >>= 7
+ enc[-1] &= 0x7F
+ self.payload += b('').join(map(bchr, enc))
+ return DerObject.encode(self)
+
+ def decode(self, derEle):
+ """Decode a complete DER OBJECT ID, and re-initializes this
+ object with it.
+
+ :Parameters:
+ derEle : byte string
+ A complete DER OBJECT ID.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+ """
+
+ DerObject.decode(self, derEle)
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER OBJECT ID from a file."""
+
+ # Fill up self.payload
+ DerObject._decodeFromStream(self, s)
+
+ # Derive self.value from self.payload
+ p = BytesIO_EOF(self.payload)
+ comps = list(map(str, divmod(bord(p.read_byte()),40)))
+ v = 0
+ try:
+ while True:
+ c = p.read_byte()
+ v = v*128 + (bord(c) & 0x7F)
+ if not (bord(c) & 0x80):
+ comps.append(str(v))
+ v = 0
+ except EOFError:
+ pass
+ self.value = '.'.join(comps)
+
+def newDerObjectId(dottedstring):
+ """Create a DerObjectId object, already initialized with the given Object
+ Identifier (a dotted string)."""
+
+ der = DerObjectId(dottedstring)
+ return der
+
+class DerBitString(DerObject):
+ """Class to model a DER BIT STRING.
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerBitString
+ >>> from binascii import hexlify, unhexlify
+ >>> bs_der = DerBitString(b'\\xaa')
+ >>> bs_der.value += b'\\xbb'
+ >>> print hexlify(bs_der.encode())
+
+ which will show ``040300aabb``, the DER encoding for the bit string
+ ``b'\\xAA\\xBB'``.
+
+ For decoding:
+
+ >>> s = unhexlify(b'040300aabb')
+ >>> try:
+ >>> bs_der = DerBitString()
+ >>> bs_der.decode(s)
+ >>> print hexlify(bs_der.value)
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER OCTET STRING"
+
+ the output will be ``aabb``.
+ """
+
+ def __init__(self, value=b(''), implicit=None):
+ """Initialize the DER object as a BIT STRING.
+
+ :Parameters:
+ value : byte string
+ The initial, packed bit string.
+ If not specified, the bit string is empty.
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for OCTET STRING (3).
+ """
+ DerObject.__init__(self, 0x03, b(''), implicit, False)
+ self.value = value #: The bitstring value (packed)
+
+ def encode(self):
+ """Return the DER BIT STRING, fully encoded as a
+ binary string."""
+
+ # Add padding count byte
+ self.payload = b('\x00') + self.value
+ return DerObject.encode(self)
+
+ def decode(self, derEle):
+ """Decode a complete DER BIT STRING, and re-initializes this
+ object with it.
+
+ :Parameters:
+ derEle : byte string
+ A complete DER BIT STRING.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+ """
+
+ DerObject.decode(self, derEle)
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER BIT STRING DER from a file."""
+
+ # Fill-up self.payload
+ DerObject._decodeFromStream(self, s)
+
+ if self.payload and bord(self.payload[0])!=0:
+ raise ValueError("Not a valid BIT STRING")
+
+ # Fill-up self.value
+ self.value = b('')
+ # Remove padding count byte
+ if self.payload:
+ self.value = self.payload[1:]
+
+def newDerBitString(binstring):
+ """Create a DerStringString object, already initialized with the binary
+ string."""
+
+ if isinstance(binstring, DerObject):
+ der = DerBitString(binstring.encode())
+ else:
+ der = DerBitString(binstring)
+ return der
+
+class DerSetOf(DerObject):
+ """Class to model a DER SET OF.
+
+ An example of encoding is:
+
+ >>> from cgcloud_Crypto.Util.asn1 import DerBitString
+ >>> from binascii import hexlify, unhexlify
+ >>> so_der = DerSetOf([4,5])
+ >>> so_der.add(6)
+ >>> print hexlify(so_der.encode())
+
+ which will show ``3109020104020105020106``, the DER encoding
+ of a SET OF with items 4,5, and 6.
+
+ For decoding:
+
+ >>> s = unhexlify(b'3109020104020105020106')
+ >>> try:
+ >>> so_der = DerSetOf()
+ >>> so_der.decode(s)
+ >>> print [x for x in so_der]
+ >>> except (ValueError, EOFError):
+ >>> print "Not a valid DER SET OF"
+
+ the output will be ``[4L, 5L, 6L]``.
+ """
+
+ def __init__(self, startSet=None, implicit=None):
+ """Initialize the DER object as a SET OF.
+
+ :Parameters:
+ startSet : container
+ The initial set of integers or DER encoded objects.
+ implicit : integer
+ The IMPLICIT tag to use for the encoded object.
+ It overrides the universal tag for SET OF (17).
+ """
+ DerObject.__init__(self, 0x11, b(''), implicit, True)
+ self._seq = []
+ self._elemOctet = None
+ if startSet:
+ for e in startSet:
+ self.add(e)
+
+ def __getitem__(self, n):
+ return self._seq[n]
+
+ def __iter__(self):
+ return iter(self._seq)
+
+ def __len__(self):
+ return len(self._seq)
+
+ def add(self, elem):
+ """Add an element to the set.
+
+ :Parameters:
+ elem : byte string or integer
+ An element of the same type of objects already in the set.
+ It can be an integer or a DER encoded object.
+ """
+ if _isInt(elem):
+ eo = 0x02
+ else:
+ eo = bord(elem[0])
+ if self._elemOctet != eo:
+ if self._elemOctet:
+ raise ValueError("New element does not belong to the set")
+ self._elemOctet = eo
+ if not elem in self._seq:
+ self._seq.append(elem)
+
+ def decode(self, derEle):
+ """Decode a complete SET OF DER element, and re-initializes this
+ object with it.
+
+ DER INTEGERs are decoded into Python integers. Any other DER
+ element is left undecoded; its validity is not checked.
+
+ :Parameters:
+ derEle : byte string
+ A complete DER BIT SET OF.
+
+ :Raise ValueError:
+ In case of parsing errors.
+ :Raise EOFError:
+ If the DER element is too short.
+ """
+
+ DerObject.decode(self, derEle)
+
+ def _decodeFromStream(self, s):
+ """Decode a complete DER SET OF from a file."""
+
+ self._seq = []
+
+ # Fill up self.payload
+ DerObject._decodeFromStream(self, s)
+
+ # Add one item at a time to self.seq, by scanning self.payload
+ p = BytesIO_EOF(self.payload)
+ setIdOctet = -1
+ while True:
+ try:
+ p.setRecord(True)
+ der = DerObject()
+ der._decodeFromStream(p)
+
+ # Verify that all members are of the same type
+ if setIdOctet < 0:
+ setIdOctet = der._idOctet
+ else:
+ if setIdOctet != der._idOctet:
+ raise ValueError("Not all elements are of the same DER type")
+
+ # Parse INTEGERs differently
+ if setIdOctet != 0x02:
+ self._seq.append(p._recording)
+ else:
+ derInt = DerInteger()
+ derInt.decode(p._recording)
+ self._seq.append(derInt.value)
+
+ except _NoDerElementError:
+ break
+ # end
+
+ def encode(self):
+ """Return this SET OF DER element, fully encoded as a
+ binary string.
+ """
+
+ # Elements in the set must be ordered in lexicographic order
+ ordered = []
+ for item in self._seq:
+ if _isInt(item):
+ bys = DerInteger(item).encode()
+ else:
+ bys = item
+ ordered.append(bys)
+ ordered.sort()
+ self.payload = b('').join(ordered)
+ return DerObject.encode(self)
+
+def newDerSetOf(*der_objs):
+ """Create a DerSequence object, already initialized with all objects
+ passed as parameters."""
+
+ der = DerSetOf()
+ for obj in der_objs:
+ if isinstance(obj, DerObject):
+ der.add(obj.encode())
+ else:
+ der.add(obj)
+ return der
diff --git a/lib/src/cgcloud_Crypto/Util/number.py b/lib/src/cgcloud_Crypto/Util/number.py
new file mode 100644
index 0000000..77d56f5
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Util/number.py
@@ -0,0 +1,156 @@
+#
+# number.py : Number-theoretic functions
+#
+# Part of the Python Cryptography Toolkit
+#
+# Written by Andrew M. Kuchling, Barry A. Warsaw, and others
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+#
+
+__revision__ = "$Id$"
+
+from warnings import warn as _warn
+import math
+import sys
+
+from cgcloud_Crypto.pct_warnings import GetRandomNumber_DeprecationWarning, PowmInsecureWarning
+from cgcloud_Crypto.Util.py3compat import *
+
+bignum = long
+try:
+ from cgcloud_Crypto.PublicKey import _fastmath
+except ImportError:
+ # For production, we are going to let import issues due to gmp/mpir shared
+ # libraries not loading slide silently and use slowmath. If you'd rather
+ # see an exception raised if _fastmath exists but cannot be imported,
+ # uncomment the below
+ #
+ # from distutils.sysconfig import get_config_var
+ # import inspect, os
+ # _fm_path = os.path.normpath(os.path.dirname(os.path.abspath(
+ # inspect.getfile(inspect.currentframe())))
+ # +"/../../PublicKey/_fastmath"+get_config_var("SO"))
+ # if os.path.exists(_fm_path):
+ # raise ImportError("While the _fastmath module exists, importing "+
+ # "it failed. This may point to the gmp or mpir shared library "+
+ # "not being in the path. _fastmath was found at "+_fm_path)
+ _fastmath = None
+
+# You need libgmp v5 or later to get mpz_powm_sec. Warn if it's not available.
+if _fastmath is not None and not _fastmath.HAVE_DECL_MPZ_POWM_SEC:
+ _warn("Not using mpz_powm_sec. You should rebuild using libgmp >= 5 to avoid timing attack vulnerability.", PowmInsecureWarning)
+
+# New functions
+# from _number_new import *
+
+# Commented out and replaced with faster versions below
+## def long2str(n):
+## s=''
+## while n>0:
+## s=chr(n & 255)+s
+## n=n>>8
+## return s
+
+## import types
+## def str2long(s):
+## if type(s)!=types.StringType: return s # Integers will be left alone
+## return reduce(lambda x,y : x*256+ord(y), s, 0L)
+
+def inverse(u, v):
+ """inverse(u:long, v:long):long
+ Return the inverse of u mod v.
+ """
+ u3, v3 = long(u), long(v)
+ u1, v1 = 1L, 0L
+ while v3 > 0:
+ q=divmod(u3, v3)[0]
+ u1, v1 = v1, u1 - v1*q
+ u3, v3 = v3, u3 - v3*q
+ while u1<0:
+ u1 = u1 + v
+ return u1
+
+# Improved conversion functions contributed by Barry Warsaw, after
+# careful benchmarking
+
+import struct
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b('')
+ n = long(n)
+ pack = struct.pack
+ while n > 0:
+ s = pack('>I', n & 0xffffffffL) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b('\000')[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b('\000')
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b('\000') + s
+ return s
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0L
+ unpack = struct.unpack
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b('\000') * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + unpack('>I', s[i:i+4])[0]
+ return acc
+
+# For backwards compatibility...
+import warnings
+def long2str(n, blocksize=0):
+ warnings.warn("long2str() has been replaced by long_to_bytes()")
+ return long_to_bytes(n, blocksize)
+def str2long(s):
+ warnings.warn("str2long() has been replaced by bytes_to_long()")
+ return bytes_to_long(s)
+
+def _import_Random():
+ # This is called in a function instead of at the module level in order to
+ # avoid problems with recursive imports
+ global Random, StrongRandom
+ from cgcloud_Crypto import Random
+ from cgcloud_Crypto.Random.random import StrongRandom
diff --git a/lib/src/cgcloud_Crypto/Util/py3compat.py b/lib/src/cgcloud_Crypto/Util/py3compat.py
new file mode 100644
index 0000000..7aa2bee
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/Util/py3compat.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+#
+# Util/py3compat.py : Compatibility code for handling Py3k / Python 2.x
+#
+# Written in 2010 by Thorsten Behrens
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Compatibility code for handling string/bytes changes from Python 2.x to Py3k
+
+In Python 2.x, strings (of type ''str'') contain binary data, including encoded
+Unicode text (e.g. UTF-8). The separate type ''unicode'' holds Unicode text.
+Unicode literals are specified via the u'...' prefix. Indexing or slicing
+either type always produces a string of the same type as the original.
+Data read from a file is always of '''str'' type.
+
+In Python 3.x, strings (type ''str'') may only contain Unicode text. The u'...'
+prefix and the ''unicode'' type are now redundant. A new type (called
+''bytes'') has to be used for binary data (including any particular
+''encoding'' of a string). The b'...' prefix allows one to specify a binary
+literal. Indexing or slicing a string produces another string. Slicing a byte
+string produces another byte string, but the indexing operation produces an
+integer. Data read from a file is of '''str'' type if the file was opened in
+text mode, or of ''bytes'' type otherwise.
+
+Since PyCrypto aims at supporting both Python 2.x and 3.x, the following helper
+functions are used to keep the rest of the library as independent as possible
+from the actual Python version.
+
+In general, the code should always deal with binary strings, and use integers
+instead of 1-byte character strings.
+
+b(s)
+ Take a text string literal (with no prefix or with u'...' prefix) and
+ make a byte string.
+bchr(c)
+ Take an integer and make a 1-character byte string.
+bord(c)
+ Take the result of indexing on a byte string and make an integer.
+tobytes(s)
+ Take a text string, a byte string, or a sequence of character taken from
+ a byte string and make a byte string.
+"""
+
+__revision__ = "$Id$"
+
+import sys
+
+if sys.version_info[0] == 2:
+ from types import UnicodeType as _UnicodeType # In Python 2.1, 'unicode' is a function, not a type.
+
+ def b(s):
+ return s
+ def bchr(s):
+ return chr(s)
+ def bstr(s):
+ return str(s)
+ def bord(s):
+ return ord(s)
+ def tobytes(s):
+ if isinstance(s, _UnicodeType):
+ return s.encode("latin-1")
+ else:
+ return ''.join(s)
+ def tostr(bs):
+ return unicode(bs, 'latin-1')
+ # In Pyton 2.x, StringIO is a stand-alone module
+ from StringIO import StringIO as BytesIO
+else:
+ def b(s):
+ return s.encode("latin-1") # utf-8 would cause some side-effects we don't want
+ def bchr(s):
+ return bytes([s])
+ def bstr(s):
+ if isinstance(s,str):
+ return bytes(s,"latin-1")
+ else:
+ return bytes(s)
+ def bord(s):
+ return s
+ def tobytes(s):
+ if isinstance(s,bytes):
+ return s
+ else:
+ if isinstance(s,str):
+ return s.encode("latin-1")
+ else:
+ return bytes(s)
+ def tostr(bs):
+ return bs.decode("latin-1")
+ # In Pyton 3.x, StringIO is a sub-module of io
+ from io import BytesIO
+
+# vim:set ts=4 sw=4 sts=4 expandtab:
diff --git a/lib/src/cgcloud_Crypto/__init__.py b/lib/src/cgcloud_Crypto/__init__.py
new file mode 100644
index 0000000..2834731
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/__init__.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+"""Python Cryptography Toolkit
+
+A collection of cryptographic modules implementing various algorithms
+and protocols.
+
+Subpackages:
+
+Crypto.Cipher
+ Secret-key (AES, DES, ARC4) and public-key encryption (RSA PKCS#1) algorithms
+Crypto.Hash
+ Hashing algorithms (MD5, SHA, HMAC)
+Crypto.Protocol
+ Cryptographic protocols (Chaffing, all-or-nothing transform, key derivation
+ functions). This package does not contain any network protocols.
+Crypto.PublicKey
+ Public-key encryption and signature algorithms (RSA, DSA)
+Crypto.Signature
+ Public-key signature algorithms (RSA PKCS#1)
+Crypto.Util
+ Various useful modules and functions (long-to-string conversion, random number
+ generation, number theoretic functions)
+"""
+
+__all__ = ['Cipher', 'Hash', 'Protocol', 'PublicKey', 'Util', 'Signature', 'IO']
+
+__version__ = '2.7a1' # See also below and setup.py
+__revision__ = "$Id$"
+
+# New software should look at this instead of at __version__ above.
+version_info = (2, 7, 0, 'alpha', 1) # See also above and setup.py
+
diff --git a/lib/src/cgcloud_Crypto/pct_warnings.py b/lib/src/cgcloud_Crypto/pct_warnings.py
new file mode 100644
index 0000000..d6adc5b
--- /dev/null
+++ b/lib/src/cgcloud_Crypto/pct_warnings.py
@@ -0,0 +1,63 @@
+# -*- coding: ascii -*-
+#
+# pct_warnings.py : PyCrypto warnings file
+#
+# Written in 2008 by Dwayne C. Litzenberger <dlitz at dlitz.net>
+#
+# ===================================================================
+# The contents of this file are dedicated to the public domain. To
+# the extent that dedication to the public domain is not available,
+# everyone is granted a worldwide, perpetual, royalty-free,
+# non-exclusive license to exercise all rights associated with the
+# contents of this file for any purpose whatsoever.
+# No rights are reserved.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ===================================================================
+
+#
+# Base classes. All our warnings inherit from one of these in order to allow
+# the user to specifically filter them.
+#
+
+class CryptoWarning(Warning):
+ """Base class for PyCrypto warnings"""
+
+class CryptoDeprecationWarning(DeprecationWarning, CryptoWarning):
+ """Base PyCrypto DeprecationWarning class"""
+
+class CryptoRuntimeWarning(RuntimeWarning, CryptoWarning):
+ """Base PyCrypto RuntimeWarning class"""
+
+#
+# Warnings that we might actually use
+#
+
+class RandomPool_DeprecationWarning(CryptoDeprecationWarning):
+ """Issued when Crypto.Util.randpool.RandomPool is instantiated."""
+
+class ClockRewindWarning(CryptoRuntimeWarning):
+ """Warning for when the system clock moves backwards."""
+
+class GetRandomNumber_DeprecationWarning(CryptoDeprecationWarning):
+ """Issued when Crypto.Util.number.getRandomNumber is invoked."""
+
+class DisableShortcut_DeprecationWarning(CryptoDeprecationWarning):
+ """Issued when Counter.new(disable_shortcut=...) is invoked."""
+
+class PowmInsecureWarning(CryptoRuntimeWarning):
+ """Warning for when _fastmath is built without mpz_powm_sec"""
+
+# By default, we want this warning to be shown every time we compensate for
+# clock rewinding.
+import warnings as _warnings
+_warnings.filterwarnings('always', category=ClockRewindWarning, append=1)
+
+# vim:set ts=4 sw=4 sts=4 expandtab:
diff --git a/mesos-tools/.gitignore b/mesos-tools/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/mesos-tools/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/mesos-tools/setup.cfg b/mesos-tools/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/mesos-tools/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/mesos-tools/setup.py b/mesos-tools/setup.py
new file mode 100644
index 0000000..053da2f
--- /dev/null
+++ b/mesos-tools/setup.py
@@ -0,0 +1,22 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, boto_dep
+
+setup(
+ name="cgcloud-mesos-tools",
+ version=cgcloud_version,
+
+ author='Christopher Ketchum',
+ author_email='cketchum at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage an Apache Mesos cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[
+ bd2k_python_lib_dep,
+ 'cgcloud-lib==' + cgcloud_version,
+ boto_dep ] )
diff --git a/mesos-tools/src/cgcloud/__init__.py b/mesos-tools/src/cgcloud/__init__.py
new file mode 100644
index 0000000..ce648cf
--- /dev/null
+++ b/mesos-tools/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
diff --git a/mesos-tools/src/cgcloud/mesos_tools/__init__.py b/mesos-tools/src/cgcloud/mesos_tools/__init__.py
new file mode 100644
index 0000000..0212df2
--- /dev/null
+++ b/mesos-tools/src/cgcloud/mesos_tools/__init__.py
@@ -0,0 +1,396 @@
+import errno
+import fcntl
+import itertools
+import logging
+import os
+import re
+import socket
+import stat
+import time
+from collections import OrderedDict
+from grp import getgrnam
+from pwd import getpwnam
+from subprocess import check_call, check_output, CalledProcessError
+from urllib2 import urlopen
+
+import boto.ec2
+from bd2k.util import memoize, less_strict_bool
+from bd2k.util.files import mkdir_p
+from boto.ec2.instance import Instance
+
+from cgcloud.lib.ec2 import EC2VolumeHelper
+from cgcloud.lib.util import volume_label_hash
+
+initctl = '/sbin/initctl'
+
+sudo = '/usr/bin/sudo'
+
+log = logging.getLogger( __name__ )
+
+
+class MesosTools( object ):
+ """
+ Tools for master discovery and managing the slaves file for Mesos. All of this happens at
+ boot time when a node (master or slave) starts up as part of a cluster.
+
+ Master discovery works as follows: All instances in a Mesos cluster are tagged with the
+ instance ID of the master. Each instance will look up the private IP of 1) the master
+ instance using the EC2 API (via boto) and 2) itself using the instance metadata endpoint. An
+ entry for "mesos-master" will be added to /etc/hosts. All configuration files use these names
+ instead of hard-coding the IPs. This is all that's needed to boot a working cluster.
+
+ Optionally, a persistent EBS volume is attached, formmatted (if needed) and mounted.
+ """
+
+ def __init__( self, user, shared_dir, ephemeral_dir, persistent_dir, lazy_dirs ):
+ """
+ :param user: the user the services run as
+ """
+ super( MesosTools, self ).__init__( )
+ self.user = user
+ self.shared_dir = shared_dir
+ self.ephemeral_dir = ephemeral_dir
+ self.persistent_dir = persistent_dir
+ self.uid = getpwnam( self.user ).pw_uid
+ self.gid = getgrnam( self.user ).gr_gid
+ self.lazy_dirs = lazy_dirs
+ self._patch_boto_config( )
+
+ def _patch_boto_config( self ):
+ from boto import config
+ def inject_default( name, default ):
+ section = 'Boto'
+ value = config.get( section, name )
+
+ if value != default:
+ if not config.has_section( section ):
+ config.add_section( section )
+ config.set( section, name, default )
+
+ # Override the 5xx retry limit default of 6
+ inject_default( 'num_retries', '12' )
+
+ def start( self ):
+ """
+ Invoked at boot time or when the mesosbox service is started.
+ """
+ while not os.path.exists( '/tmp/cloud-init.done' ):
+ log.info( "Waiting for cloud-init to finish ..." )
+ time.sleep( 1 )
+ log.info( "Starting mesosbox" )
+ self.__setup_etc_hosts( )
+ self.__mount_ebs_volume( )
+ self.__create_lazy_dirs( )
+
+ if self.master_ip == self.node_ip:
+ node_type = 'master'
+ self.__publish_host_key( )
+ else:
+ node_type = 'slave'
+ self.__get_master_host_key( )
+ self.__wait_for_master_ssh( )
+ if self.shared_dir:
+ self._copy_dir_from_master( self.shared_dir )
+ self.__prepare_slave_args( )
+
+ log.info( "Starting %s services" % node_type )
+ check_call( [ initctl, 'emit', 'mesosbox-start-%s' % node_type ] )
+
+ def stop( self ):
+ """
+ Invoked at shutdown time or when the mesosbox service is stopped.
+ """
+ log.info( "Stopping mesosbox" )
+ self.__patch_etc_hosts( { 'mesos-master': None } )
+
+ @classmethod
+ @memoize
+ def instance_data( cls, path ):
+ return urlopen( 'http://169.254.169.254/latest/' + path ).read( )
+
+ @classmethod
+ @memoize
+ def meta_data( cls, path ):
+ return cls.instance_data( 'meta-data/' + path )
+
+ @classmethod
+ @memoize
+ def user_data( cls ):
+ user_data = cls.instance_data( 'user-data' )
+ log.info( "User data is '%s'", user_data )
+ return user_data
+
+ @property
+ @memoize
+ def node_ip( self ):
+ ip = self.meta_data( 'local-ipv4' )
+ log.info( "Local IP is '%s'", ip )
+ return ip
+
+ @property
+ @memoize
+ def instance_id( self ):
+ instance_id = self.meta_data( 'instance-id' )
+ log.info( "Instance ID is '%s'", instance_id )
+ return instance_id
+
+ @property
+ @memoize
+ def availability_zone( self ):
+ zone = self.meta_data( 'placement/availability-zone' )
+ log.info( "Availability zone is '%s'", zone )
+ return zone
+
+ @property
+ @memoize
+ def region( self ):
+ m = re.match( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$', self.availability_zone )
+ assert m
+ region = m.group( 1 )
+ log.info( "Region is '%s'", region )
+ return region
+
+ @property
+ @memoize
+ def ec2( self ):
+ return boto.ec2.connect_to_region( self.region )
+
+ @property
+ @memoize
+ def master_id( self ):
+ master_id = self.instance_tag( 'leader_instance_id' )
+ if not master_id:
+ raise RuntimeError( "Instance not tagged with master's instance ID" )
+ log.info( "Master's instance ID is '%s'", master_id )
+ return master_id
+
+ @property
+ @memoize
+ def master_ip( self ):
+ if self.master_id == self.instance_id:
+ master_ip = self.node_ip
+ log.info( "I am the master" )
+ else:
+ log.info( "I am a slave" )
+ master_ip = self.master_instance.private_ip_address
+ log.info( "Master IP is '%s'", master_ip )
+ return master_ip
+
+ @property
+ @memoize
+ def is_spot_instance( self ):
+ result = bool( self.this_instance.spot_instance_request_id )
+ log.info( "I am %s spot instance", "a" if result else "not a" )
+ return result
+
+ @memoize
+ def instance( self, instance_id ):
+ """:rtype: Instance"""
+ instances = self.ec2.get_only_instances( instance_ids=[ instance_id ] )
+ assert len( instances ) == 1
+ instance = instances[ 0 ]
+ return instance
+
+ @property
+ @memoize
+ def this_instance( self ):
+ """:rtype: Instance"""
+ instance = self.instance( self.instance_id )
+ log.info( "I am running on %r", instance.__dict__ )
+ return instance
+
+ @property
+ @memoize
+ def master_instance( self ):
+ """:rtype: Instance"""
+ return self.instance( self.master_id )
+
+ @memoize
+ def instance_tag( self, key ):
+ """:rtype: str|None"""
+ return self.this_instance.tags.get( key )
+
+ def __mount_ebs_volume( self ):
+ """
+ Attach, format (if necessary) and mount the EBS volume with the same cluster ordinal as
+ this node.
+ """
+ ebs_volume_size = self.instance_tag( 'ebs_volume_size' ) or '0'
+ ebs_volume_size = int( ebs_volume_size )
+ if ebs_volume_size:
+ instance_name = self.instance_tag( 'Name' )
+ cluster_ordinal = int( self.instance_tag( 'cluster_ordinal' ) )
+ volume_name = '%s__%d' % (instance_name, cluster_ordinal)
+ volume = EC2VolumeHelper( ec2=self.ec2,
+ availability_zone=self.availability_zone,
+ name=volume_name,
+ size=ebs_volume_size,
+ volume_type="gp2" )
+ # TODO: handle case where volume is already attached
+ device_ext = '/dev/sdf'
+ device = '/dev/xvdf'
+ volume.attach( self.instance_id, device_ext )
+
+ # Wait for inode to appear and make sure its a block device
+ while True:
+ try:
+ assert stat.S_ISBLK( os.stat( device ).st_mode )
+ break
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ time.sleep( 1 )
+ else:
+ raise
+
+ # Only format empty volumes
+ volume_label = volume_label_hash( volume_name )
+ if check_output( [ 'file', '-sL', device ] ).strip( ) == device + ': data':
+ check_call( [ 'mkfs', '-t', 'ext4', device ] )
+ check_call( [ 'e2label', device, volume_label ] )
+ else:
+ # If the volume is not empty, verify the file system label
+ actual_label = check_output( [ 'e2label', device ] ).strip( )
+ if actual_label != volume_label:
+ raise AssertionError(
+ "Expected volume label '%s' (derived from '%s') but got '%s'" %
+ (volume_label, volume_name, actual_label) )
+ current_mount_point = self.__mount_point( device )
+ if current_mount_point is None:
+ mkdir_p( self.persistent_dir )
+ check_call( [ 'mount', device, self.persistent_dir ] )
+ elif current_mount_point == self.persistent_dir:
+ pass
+ else:
+ raise RuntimeError(
+ "Can't mount device %s on '%s' since it is already mounted on '%s'" % (
+ device, self.persistent_dir, current_mount_point) )
+ else:
+ # No persistent volume is attached and the root volume is off limits, so we will need
+ # to place persistent data on the ephemeral volume.
+ self.persistent_dir = self.ephemeral_dir
+
+ def __get_master_host_key( self ):
+ log.info( "Getting master's host key" )
+ master_host_key = self.master_instance.tags.get( 'ssh_host_key' )
+ if master_host_key:
+ self.__add_host_keys( [ 'mesos-master:' + master_host_key ] )
+ else:
+ log.warn( "Could not get master's host key" )
+
+ def __add_host_keys( self, host_keys, globally=None ):
+ if globally is None:
+ globally = os.geteuid( ) == 0
+ if globally:
+ known_hosts_path = '/etc/ssh/ssh_known_hosts'
+ else:
+ known_hosts_path = os.path.expanduser( '~/.ssh/known_hosts' )
+ with open( known_hosts_path, 'a+' ) as f:
+ fcntl.flock( f, fcntl.LOCK_EX )
+ keys = set( _.strip( ) for _ in f.readlines( ) )
+ keys.update( ' '.join( _.split( ':' ) ) for _ in host_keys )
+ if '' in keys: keys.remove( '' )
+ keys = list( keys )
+ keys.sort( )
+ keys.append( '' )
+ f.seek( 0 )
+ f.truncate( 0 )
+ f.write( '\n'.join( keys ) )
+
+ def __wait_for_master_ssh( self ):
+ """
+ Wait until the instance represented by this box is accessible via SSH.
+ """
+ for _ in itertools.count( ):
+ s = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
+ try:
+ s.settimeout( 5 )
+ s.connect( ('mesos-master', 22) )
+ return
+ except socket.error:
+ pass
+ finally:
+ s.close( )
+
+ def _copy_dir_from_master( self, path ):
+ log.info( "Copying %s from master" % path )
+ if not path.endswith( '/' ):
+ path += '/'
+ for tries in range( 5 ):
+ try:
+ check_call( [ sudo, '-u', self.user,
+ 'rsync', '-av', 'mesos-master:' + path, path ] )
+ except CalledProcessError as e:
+ log.warn( "rsync returned %i, retrying in 5s", e.returncode )
+ time.sleep( 5 )
+ else:
+ return
+ raise RuntimeError( "Failed to copy %s from master" )
+
+ def __get_host_key( self ):
+ with open( '/etc/ssh/ssh_host_ecdsa_key.pub' ) as f:
+ return ':'.join( f.read( ).split( )[ :2 ] )
+
+ def __publish_host_key( self ):
+ master_host_key = self.__get_host_key( )
+ self.ec2.create_tags( [ self.master_id ], dict( ssh_host_key=master_host_key ) )
+
+ def __create_lazy_dirs( self ):
+ log.info( "Bind-mounting directory structure" )
+ for (parent, name, persistent) in self.lazy_dirs:
+ assert parent[ 0 ] == os.path.sep
+ logical_path = os.path.join( parent, name )
+ if persistent is None:
+ tag = 'persist' + logical_path.replace( os.path.sep, '_' )
+ persistent = less_strict_bool( self.instance_tag( tag ) )
+ location = self.persistent_dir if persistent else self.ephemeral_dir
+ physical_path = os.path.join( location, parent[ 1: ], name )
+ mkdir_p( physical_path )
+ os.chown( physical_path, self.uid, self.gid )
+ check_call( [ 'mount', '--bind', physical_path, logical_path ] )
+
+ def __setup_etc_hosts( self ):
+ hosts = self.instance_tag( 'etc_hosts_entries' ) or ""
+ hosts = parse_etc_hosts_entries( hosts )
+ hosts[ 'mesos-master' ] = self.master_ip
+ self.__patch_etc_hosts( hosts )
+
+ def __patch_etc_hosts( self, hosts ):
+ log.info( "Patching /etc/host" )
+ # FIXME: The handling of /etc/hosts isn't atomic
+ with open( '/etc/hosts', 'r+' ) as etc_hosts:
+ lines = [ line
+ for line in etc_hosts.readlines( )
+ if not any( host in line for host in hosts.iterkeys( ) ) ]
+ for host, ip in hosts.iteritems( ):
+ if ip: lines.append( "%s %s\n" % (ip, host) )
+ etc_hosts.seek( 0 )
+ etc_hosts.truncate( 0 )
+ etc_hosts.writelines( lines )
+
+ def __mount_point( self, device ):
+ with open( '/proc/mounts' ) as f:
+ for line in f:
+ line = line.split( )
+ if line[ 0 ] == device:
+ return line[ 1 ]
+ return None
+
+ def __prepare_slave_args( self ):
+ attributes = dict( preemptable=self.is_spot_instance )
+ with open( '/var/lib/mesos/slave_args', 'w' ) as f:
+ if attributes:
+ attributes = ';'.join( '%s:%r' % i for i in attributes.items( ) )
+ f.write( "--attributes=%s" % attributes )
+
+def parse_etc_hosts_entries( hosts ):
+ """
+ >>> parse_etc_hosts_entries("").items()
+ []
+ >>> parse_etc_hosts_entries("foo:1.2.3.4").items()
+ [('foo', '1.2.3.4')]
+ >>> parse_etc_hosts_entries(" foo : 1.2.3.4 , bar : 2.3.4.5 ").items()
+ [('foo', '1.2.3.4'), ('bar', '2.3.4.5')]
+ """
+ return OrderedDict( (ip.strip( ), name.strip( ))
+ for ip, name in (entry.split( ':', 1 )
+ for entry in hosts.split( ',' ) if entry) )
diff --git a/mesos/.gitignore b/mesos/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/mesos/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/mesos/LICENSE b/mesos/LICENSE
new file mode 100644
index 0000000..8f71f43
--- /dev/null
+++ b/mesos/LICENSE
@@ -0,0 +1,202 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/mesos/README.rst b/mesos/README.rst
new file mode 100644
index 0000000..5b90e13
--- /dev/null
+++ b/mesos/README.rst
@@ -0,0 +1,74 @@
+The CGCloud plugin for Mesos lets you setup a fully configured Apache Mesos
+cluster in EC2 in just minutes, regardless of the number of nodes.
+
+
+Prerequisites
+=============
+
+The ``cgcloud-mesos`` package requires that the ``cgcloud-core`` package and
+its prerequisites_ are present.
+
+.. _prerequisites: ../core#prerequisites
+
+
+Installation
+============
+
+Read the entire section before pasting any commands and ensure that all
+prerequisites are installed. It is recommended to install this plugin into the
+virtualenv you created for CGCloud::
+
+ source ~/cgcloud/bin/activate
+ pip install cgcloud-mesos
+
+If you get ``DistributionNotFound: No distributions matching the version for
+cgcloud-mesos``, try running ``pip install --pre cgcloud-mesos``.
+
+Be sure to configure_ ``cgcloud-core`` before proceeding.
+
+.. _configure: ../core/README.rst#configuration
+
+Configuration
+=============
+
+Modify your ``.profile`` or ``.bash_profile`` by adding the following line::
+
+ export CGCLOUD_PLUGINS="cgcloud.mesos:$CGCLOUD_PLUGINS"
+
+Login and out (or, on OS X, start a new Terminal tab/window).
+
+Verify the installation by running::
+
+ cgcloud list-roles
+
+The output should include the ``mesos-box`` role.
+
+Usage
+=====
+
+Create a single ``t2.micro`` box to serve as the template for the cluster
+nodes::
+
+ cgcloud create -IT mesos-box
+
+The ``I`` option stops the box once it is fully set up and takes an image (AMI)
+of it. The ``T`` option terminates the box after that.
+
+Now create a cluster by booting a master and the slaves from that AMI::
+
+ cgcloud create-cluster mesos -s 2 -t m3.large
+
+This will launch a master and two slaves using the ``m3.large`` instance type.
+
+SSH into the master::
+
+ cgcloud ssh mesos-master
+
+... or the first slave::
+
+ cgcloud ssh -o 0 mesos-slave
+
+... or the second slave::
+
+ cgcloud ssh -o 1 mesos-slave
+
diff --git a/mesos/setup.cfg b/mesos/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/mesos/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/mesos/setup.py b/mesos/setup.py
new file mode 100644
index 0000000..34b8e7f
--- /dev/null
+++ b/mesos/setup.py
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep
+
+setup( name='cgcloud-mesos',
+ version=cgcloud_version,
+
+ author='Christopher Ketchum',
+ author_email='cketchum at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage a Apache Mesos cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[ 'cgcloud-lib==' + cgcloud_version,
+ 'cgcloud-core==' + cgcloud_version,
+ bd2k_python_lib_dep,
+ fabric_dep ] )
diff --git a/mesos/src/cgcloud/__init__.py b/mesos/src/cgcloud/__init__.py
new file mode 100644
index 0000000..ce648cf
--- /dev/null
+++ b/mesos/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
diff --git a/mesos/src/cgcloud/mesos/__init__.py b/mesos/src/cgcloud/mesos/__init__.py
new file mode 100644
index 0000000..6badbce
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/__init__.py
@@ -0,0 +1,8 @@
+def roles( ):
+ from cgcloud.mesos.mesos_box import MesosBox, MesosMaster, MesosSlave
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
+
+
+def cluster_types( ):
+ from cgcloud.mesos.mesos_cluster import MesosCluster
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
diff --git a/mesos/src/cgcloud/mesos/mesos_box.py b/mesos/src/cgcloud/mesos/mesos_box.py
new file mode 100644
index 0000000..1712334
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/mesos_box.py
@@ -0,0 +1,309 @@
+import logging
+from collections import namedtuple
+from pipes import quote
+
+from bd2k.util.iterables import concat
+from bd2k.util.strings import interpolate as fmt
+from fabric.context_managers import settings
+from fabric.operations import run
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.cluster import ClusterBox, ClusterLeader, ClusterWorker
+from cgcloud.core.common_iam_policies import ec2_read_only_policy
+from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox
+from cgcloud.core.mesos_box import MesosBox as CoreMesosBox
+from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox
+from cgcloud.fabric.operations import sudo, remote_open, pip, sudov
+from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc
+
+log = logging.getLogger( __name__ )
+
+user = 'mesosbox'
+
+install_dir = '/opt/mesosbox'
+
+log_dir = '/var/log/mesosbox'
+
+ephemeral_dir = '/mnt/ephemeral'
+
+persistent_dir = '/mnt/persistent'
+
+work_dir = '/var/lib/mesos'
+
+Service = namedtuple( 'Service', [
+ 'init_name',
+ 'description',
+ 'command' ] )
+
+
+def mesos_service( name, *flags ):
+ command = concat( '/usr/sbin/mesos-{name}', '--log_dir={log_dir}/mesos', flags )
+ return Service(
+ init_name='mesosbox-' + name,
+ description=fmt( 'Mesos {name} service' ),
+ command=fmt( ' '.join( command ) ) )
+
+
+mesos_services = dict(
+ master=[ mesos_service( 'master',
+ '--registry=in_memory',
+ # would use "--ip mesos-master" here but that option only supports
+ # IP addresses, not DNS names or /etc/hosts entries
+ '--ip_discovery_command="hostname -i"',
+ '--credentials=/etc/mesos/credentials' ) ],
+ slave=[ mesos_service( 'slave',
+ '--master=mesos-master:5050',
+ '--no-switch_user',
+ '--work_dir=' + work_dir,
+ '--executor_shutdown_grace_period=60secs',
+ # By default Mesos offers the total disk minus what it reserves for
+ # itself, which is half the total disk or 5GiB whichever is smaller.
+ # The code below mimicks that logic except that it uses available disk
+ # space as opposed to total disk. NB: the default unit is MiB in Mesos.
+ "--resources=disk:$(python -c %s)" % quote( heredoc( """
+ import os
+ df = os.statvfs( "{work_dir}" )
+ free = df.f_frsize * df.f_bavail >> 20
+ print max( 0, free - min( free / 2, 5120 ) )""" ).replace( '\n',
+ ';' ) ),
+ '$(cat /var/lib/mesos/slave_args)' ) ] )
+
+
+class MesosBoxSupport( GenericUbuntuTrustyBox, Python27UpdateUbuntuBox, CoreMesosBox ):
+ """
+ A node in a Mesos cluster. Both slaves and masters are based on this initial setup. Those
+ specific roles are determined at boot time. Worker nodes need to be passed the master's IP
+ and port before starting up.
+ """
+
+ @classmethod
+ def get_role_options( cls ):
+ return super( MesosBoxSupport, cls ).get_role_options( ) + [
+ cls.RoleOption( name='etc_hosts_entries',
+ type=str,
+ repr=str,
+ inherited=True,
+ help="Additional entries for /etc/hosts in the form "
+ "'foo:1.2.3.4,bar:2.3.4.5'" ) ]
+
+ def other_accounts( self ):
+ return super( MesosBoxSupport, self ).other_accounts( ) + [ user ]
+
+ def default_account( self ):
+ return user
+
+ def __init__( self, ctx ):
+ super( MesosBoxSupport, self ).__init__( ctx )
+ self.lazy_dirs = set( )
+
+ def _populate_security_group( self, group_id ):
+ return super( MesosBoxSupport, self )._populate_security_group( group_id ) + [
+ dict( ip_protocol='tcp', from_port=0, to_port=65535,
+ src_security_group_group_id=group_id ),
+ dict( ip_protocol='udp', from_port=0, to_port=65535,
+ src_security_group_group_id=group_id ) ]
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( MesosBoxSupport, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( MesosBoxSupport )
+ policies.update( dict(
+ ec2_read_only=ec2_read_only_policy,
+ ec2_mesos_box=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) )
+ return iam_role_name, policies
+
+ def _pre_install_packages( self ):
+ super( MesosBoxSupport, self )._pre_install_packages( )
+ self.__setup_application_user( )
+
+ @fabric_task
+ def __setup_application_user( self ):
+ sudo( fmt( 'useradd '
+ '--home /home/{user} '
+ '--create-home '
+ '--user-group '
+ '--shell /bin/bash {user}' ) )
+
+ def _post_install_packages( self ):
+ super( MesosBoxSupport, self )._post_install_packages( )
+ self._propagate_authorized_keys( user, user )
+ self.__setup_shared_dir( )
+ self.__setup_ssh_config( )
+ self.__create_mesos_keypair( )
+ self.__setup_mesos( )
+ self.__install_tools( )
+
+ def _shared_dir( self ):
+ return '/home/%s/shared' % self.default_account( )
+
+ @fabric_task
+ def __setup_shared_dir( self ):
+ sudov( 'install', '-d', self._shared_dir( ), '-m', '700', '-o', self.default_account( ) )
+
+ @fabric_task
+ def __setup_ssh_config( self ):
+ with remote_open( '/etc/ssh/ssh_config', use_sudo=True ) as f:
+ f.write( heredoc( """
+ Host spark-master
+ CheckHostIP no
+ HashKnownHosts no""" ) )
+
+ @fabric_task( user=user )
+ def __create_mesos_keypair( self ):
+ self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ),
+ private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ),
+ overwrite_ec2=True )
+ # This trick allows us to roam freely within the cluster as the app user while still
+ # being able to have keypairs in authorized_keys managed by cgcloudagent such that
+ # external users can login as the app user, too. The trick depends on AuthorizedKeysFile
+ # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config
+ run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" )
+
+ def __ec2_keypair_name( self, ctx ):
+ return user + '@' + ctx.to_aws_name( self.role( ) )
+
+ @fabric_task
+ def __setup_mesos( self ):
+ sudo( "rm /etc/init/mesos-{master,slave}.conf" )
+ self._lazy_mkdir( log_dir, 'mesos', persistent=False )
+ self._lazy_mkdir( '/var/lib', 'mesos', persistent=True )
+ self.__prepare_credentials( )
+ self.__register_upstart_jobs( mesos_services )
+ self._post_install_mesos( )
+
+ def _post_install_mesos( self ):
+ pass
+
+ def __prepare_credentials( self ):
+ # Create the credentials file and transfer ownership to mesosbox
+ sudo( 'mkdir -p /etc/mesos' )
+ sudo( 'echo toil liot > /etc/mesos/credentials' )
+ sudo( 'chown mesosbox:mesosbox /etc/mesos/credentials' )
+
+ @fabric_task
+ def __install_tools( self ):
+ """
+ Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
+ is a Python package distribution that's included in cgcloud-mesos as a resource. This is
+ in contrast to the cgcloud agent, which is a standalone distribution.
+ """
+ tools_dir = install_dir + '/tools'
+ admin = self.admin_account( )
+ sudo( fmt( 'mkdir -p {tools_dir}' ) )
+ sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
+ run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
+ run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )
+
+ with settings( forward_agent=True ):
+ with self._project_artifacts( 'mesos-tools' ) as artifacts:
+ pip( use_sudo=True,
+ path=tools_dir + '/bin/pip',
+ args=concat( 'install', artifacts ) )
+ sudo( fmt( 'chown -R root:root {tools_dir}' ) )
+
+ mesos_tools = "MesosTools(**%r)" % dict( user=user,
+ shared_dir=self._shared_dir( ),
+ ephemeral_dir=ephemeral_dir,
+ persistent_dir=persistent_dir,
+ lazy_dirs=self.lazy_dirs )
+
+ self.lazy_dirs = None # make sure it can't be used anymore once we are done with it
+
+ self._register_init_script(
+ "mesosbox",
+ heredoc( """
+ description "Mesos master discovery"
+ console log
+ start on (local-filesystems and net-device-up IFACE!=lo)
+ stop on runlevel [!2345]
+ pre-start script
+ for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
+ import logging
+ logging.basicConfig( level=logging.INFO )
+ from cgcloud.mesos_tools import MesosTools
+ mesos_tools = {mesos_tools}
+ mesos_tools.start()
+ END
+ then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
+ end script
+ post-stop script
+ {tools_dir}/bin/python2.7 - <<END
+ import logging
+ logging.basicConfig( level=logging.INFO )
+ from cgcloud.mesos_tools import MesosTools
+ mesos_tools = {mesos_tools}
+ mesos_tools.stop()
+ END
+ end script""" ) )
+ # Explicitly start the mesosbox service to achieve creation of lazy directoriess right
+ # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil.
+ self._run_init_script( 'mesosbox' )
+
+ @fabric_task
+ def _lazy_mkdir( self, parent, name, persistent=False ):
+ """
+ _lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that
+ /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts.
+ Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that
+ /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts.
+
+ Note that at start-up time, /mnt/persistent may be reassigned to /mnt/ephemeral if no
+ EBS volume is mounted at /mnt/persistent.
+
+ _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir'
+ when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's
+ value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False.
+ """
+ assert self.lazy_dirs is not None
+ assert '/' not in name
+ assert parent.startswith( '/' )
+ for location in (persistent_dir, ephemeral_dir):
+ assert location.startswith( '/' )
+ assert not location.startswith( parent ) and not parent.startswith( location )
+ logical_path = parent + '/' + name
+ sudo( 'mkdir -p "%s"' % logical_path )
+ self.lazy_dirs.add( (parent, name, persistent) )
+ return logical_path
+
+ def __register_upstart_jobs( self, service_map ):
+ for node_type, services in service_map.iteritems( ):
+ start_on = "mesosbox-start-" + node_type
+ for service in services:
+ self._register_init_script(
+ service.init_name,
+ heredoc( """
+ description "{service.description}"
+ console log
+ start on {start_on}
+ stop on runlevel [016]
+ respawn
+ umask 022
+ limit nofile 8000 8192
+ setuid {user}
+ setgid {user}
+ env USER={user}
+ exec {service.command}""" ) )
+ start_on = "started " + service.init_name
+
+
+class MesosBox( MesosBoxSupport, ClusterBox ):
+ """
+ A node in a Mesos cluster; used only to create an image for master and worker boxes
+ """
+ pass
+
+
+class MesosMaster( MesosBox, ClusterLeader ):
+ """
+ The master of a cluster of boxes created from a mesos-box image
+ """
+ pass
+
+
+class MesosSlave( MesosBox, ClusterWorker ):
+ """
+ A slave in a cluster of boxes created from a mesos-box image
+ """
+ pass
diff --git a/mesos/src/cgcloud/mesos/mesos_cluster.py b/mesos/src/cgcloud/mesos/mesos_cluster.py
new file mode 100644
index 0000000..857055a
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/mesos_cluster.py
@@ -0,0 +1,12 @@
+from cgcloud.core.cluster import Cluster
+from cgcloud.mesos.mesos_box import MesosMaster, MesosSlave
+
+
+class MesosCluster( Cluster ):
+ @property
+ def worker_role( self ):
+ return MesosSlave
+
+ @property
+ def leader_role( self ):
+ return MesosMaster
diff --git a/mesos/src/cgcloud/mesos/test/__init__.py b/mesos/src/cgcloud/mesos/test/__init__.py
new file mode 100644
index 0000000..5da04d1
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/test/__init__.py
@@ -0,0 +1,27 @@
+import time
+
+from cgcloud.core.test import CoreTestCase
+from cgcloud.mesos.mesos_box import log_dir
+
+
+class MesosTestCase( CoreTestCase ):
+ """
+ Common functionality between Toil and Mesos tests
+ """
+
+ def _wait_for_mesos_slaves( self, master, num_slaves ):
+ delay = 5
+ expiration = time.time( ) + 10 * 60
+ commands = [
+ 'test "$(grep -c \'Registering slave at\' %s/mesos/mesos-master.INFO)" = "%s"' % (
+ log_dir, num_slaves) ]
+ for command in commands:
+ while True:
+ try:
+ self._ssh( master, command )
+ except SystemExit:
+ if time.time( ) + delay >= expiration:
+ self.fail( "Cluster didn't come up in time" )
+ time.sleep( delay )
+ else:
+ break
diff --git a/mesos/src/cgcloud/mesos/test/conftest.py b/mesos/src/cgcloud/mesos/test/conftest.py
new file mode 100644
index 0000000..862a5a2
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/test/conftest.py
@@ -0,0 +1 @@
+from cgcloud.core.test.conftest import pytest_configure
diff --git a/mesos/src/cgcloud/mesos/test/test_mesos.py b/mesos/src/cgcloud/mesos/test/test_mesos.py
new file mode 100644
index 0000000..0a47fcb
--- /dev/null
+++ b/mesos/src/cgcloud/mesos/test/test_mesos.py
@@ -0,0 +1,67 @@
+import os
+import logging
+
+from cgcloud.mesos.mesos_box import MesosBox, MesosMaster, MesosSlave
+from cgcloud.mesos.test import MesosTestCase
+
+log = logging.getLogger( __name__ )
+
+master = MesosMaster.role( )
+slave = MesosSlave.role( )
+node = MesosBox.role( )
+
+num_slaves = 2
+
+
+class MesosClusterTests( MesosTestCase ):
+ """
+ Covers the creation of a Mesos cluster and running a simple script on it.
+ """
+
+ cleanup = True
+ create_image = True
+
+ @classmethod
+ def setUpClass( cls ):
+ os.environ[ 'CGCLOUD_PLUGINS' ] = 'cgcloud.mesos'
+ super( MesosClusterTests, cls ).setUpClass( )
+ if cls.create_image:
+ cls._cgcloud( 'create', node, '-I', '-T' )
+
+ @classmethod
+ def tearDownClass( cls ):
+ if cls.cleanup and cls.create_image:
+ cls._cgcloud( 'delete-image', node )
+ super( MesosClusterTests, cls ).tearDownClass( )
+
+ def test_mesos( self ):
+ self._create_cluster( )
+ try:
+ self._assert_remote_failure( master )
+ self._wait_for_mesos_slaves( master, num_slaves )
+ self._test_mesos( )
+ finally:
+ if self.cleanup:
+ self._terminate_cluster( )
+
+ def _create_cluster( self, *args ):
+ self._cgcloud( 'create-cluster', 'mesos', '-s', str( num_slaves ), *args )
+
+ def _terminate_cluster( self ):
+ self._cgcloud( 'terminate-cluster', 'mesos' )
+
+ def _test_mesos( self ):
+ for i in xrange( num_slaves ):
+ self._ssh( slave, 'test ! -f cgcloud_test.tmp', ordinal=i )
+ # This is probabalistic: we hope that if we do ten times as many tasks as there are nodes
+ # chances are that we hit each node at least once.
+ num_tasks = num_slaves * 10
+ for i in xrange( num_tasks ):
+ self._ssh( master, 'mesos execute '
+ '--master=mesos-master:5050 '
+ '--name=cgcloud_test '
+ '--command="touch $(pwd)/cgcloud_test.tmp" '
+ '>> mesos_execute.out' )
+ self._ssh( master, 'test "$(grep -c TASK_FINISHED mesos_execute.out)" = %i' % num_tasks )
+ for i in xrange( num_slaves ):
+ self._ssh( slave, 'test -f cgcloud_test.tmp', ordinal=i )
diff --git a/run_tests.py b/run_tests.py
new file mode 100644
index 0000000..d1b80b3
--- /dev/null
+++ b/run_tests.py
@@ -0,0 +1,93 @@
+import glob
+import itertools
+import logging
+import os
+import subprocess
+import sys
+
+log = logging.getLogger( __name__ )
+
+# A "keyword" is an argument to pytest's -k option. It acts as a selector for tests. Each of the
+# keywords in the list below will be run concurrently. Once they are done, everything else will
+# be run sequentially. Please note that keywords are match as substrings: Foo will match Foo,
+# FooBar and BarFoo.
+#
+try:
+ if not os.getcwd( ) in sys.path:
+ sys.path.append( os.getcwd( ) )
+ from tests import parallelizable_keywords
+except ImportError:
+ parallelizable_keywords = [ ]
+
+
+def run_tests( index, keywords=None, args=None ):
+ cmd = [ sys.executable, '-m', 'pytest', '-vv',
+ '--junitxml', 'nosetests-%s.xml' % index ]
+ if keywords:
+ cmd.extend( [ '-k', keywords ] )
+ if args:
+ cmd.extend( args )
+ log.info( 'Running %r', cmd )
+ return subprocess.Popen( cmd )
+
+
+def main( args ):
+ for name in glob.glob( 'nosetests-*.xml' ):
+ os.unlink( name )
+ num_failures = 0
+ index = itertools.count( )
+ pids = set( )
+ # PyTest thinks that absence of tests constitutes an error.
+ # Luckily it has a distinct status code (5) for that.
+ ok_statuses = (0, 5)
+ try:
+ for keyword in parallelizable_keywords:
+ process = run_tests( index=str( next( index ) ),
+ keywords=keyword,
+ args=args )
+ pids.add( process.pid )
+ while pids:
+ pid, status = os.wait( )
+ pids.remove( pid )
+ if os.WIFEXITED( status ):
+ status = os.WEXITSTATUS( status )
+ if status not in ok_statuses:
+ num_failures += 1
+ else:
+ num_failures += 1
+ except:
+ for pid in pids:
+ os.kill( pid, 15 )
+ raise
+
+ if parallelizable_keywords:
+ everything_else = ' and '.join( 'not ' + keyword for keyword in parallelizable_keywords )
+ else:
+ everything_else = None
+
+ process = run_tests( index=str( next( index ) ),
+ keywords=everything_else,
+ args=args )
+ if process.wait( ) not in ok_statuses:
+ num_failures += 1
+
+ import xml.etree.ElementTree as ET
+ testsuites = ET.Element( 'testsuites' )
+ for name in glob.glob( 'nosetests-*.xml' ):
+ log.info( "Reading test report %s", name )
+ tree = ET.parse( name )
+ testsuites.append( tree.getroot( ) )
+ os.unlink( name )
+ name = 'nosetests.xml'
+ log.info( 'Writing aggregate test report %s', name )
+ ET.ElementTree( testsuites ).write( name, xml_declaration=True )
+
+ if num_failures:
+ log.error( '%i out %i child processes failed', num_failures, next( index ) )
+
+ return num_failures
+
+
+if __name__ == '__main__':
+ logging.basicConfig( level=logging.INFO )
+ sys.exit( main( sys.argv[ 1: ] ) )
diff --git a/spark-tools/.gitignore b/spark-tools/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/spark-tools/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/spark-tools/setup.cfg b/spark-tools/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/spark-tools/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/spark-tools/setup.py b/spark-tools/setup.py
new file mode 100644
index 0000000..20b752b
--- /dev/null
+++ b/spark-tools/setup.py
@@ -0,0 +1,22 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, boto_dep
+
+setup(
+ name="cgcloud-spark-tools",
+ version=cgcloud_version,
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage a Apache Spark cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[
+ bd2k_python_lib_dep,
+ 'cgcloud-lib==' + cgcloud_version,
+ boto_dep ] )
diff --git a/spark-tools/src/cgcloud/__init__.py b/spark-tools/src/cgcloud/__init__.py
new file mode 100644
index 0000000..ce648cf
--- /dev/null
+++ b/spark-tools/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
diff --git a/spark-tools/src/cgcloud/spark_tools/__init__.py b/spark-tools/src/cgcloud/spark_tools/__init__.py
new file mode 100644
index 0000000..aa01069
--- /dev/null
+++ b/spark-tools/src/cgcloud/spark_tools/__init__.py
@@ -0,0 +1,482 @@
+import errno
+import fcntl
+import itertools
+import logging
+import os
+import re
+import socket
+import stat
+import time
+from collections import OrderedDict
+from grp import getgrnam
+from pwd import getpwnam
+from subprocess import check_call, check_output, CalledProcessError, STDOUT
+from urllib2 import urlopen
+
+import boto.ec2
+from bd2k.util import memoize, less_strict_bool
+from bd2k.util.files import mkdir_p
+from boto.ec2.instance import Instance
+
+from cgcloud.lib.ec2 import EC2VolumeHelper
+from cgcloud.lib.util import volume_label_hash
+
+initctl = '/sbin/initctl'
+
+sudo = '/usr/bin/sudo'
+
+log = logging.getLogger( __name__ )
+
+
+class SparkTools( object ):
+ """
+ Tools for master discovery and managing the slaves file for Hadoop and Spark. All of this
+ happens at boot time when a node (master or slave) starts up as part of a cluster.
+
+ Master discovery works as follows: All instances in a Spark cluster are tagged with the
+ instance ID of the master. Each instance will look up the private IP of 1) the master
+ instance using the EC2 API (via boto) and 2) itself using the instance metadata endpoint. An
+ entry for "spark-master" will be added to /etc/hosts. All configuration files use these names
+ instead of hard-coding the IPs. This is all that's needed to boot a working cluster.
+
+ In order to facilitate the start-all.sh and stop-all.sh scripts in Hadoop and Spark,
+ the slaves file needs to be populated as well. The master seeds the slaves file by listing
+ all instances tagged with its own instance ID. Additionally, the slaves ssh into the master
+ to have their own IP added to the master's slaves file, thereby enabling the dynamic addition
+ of slaves to a cluster. Both actions are managed by the spark-manage-slaves script.
+
+ The slaves file in spark/conf and hadoop/etc/hadoop is actually a symlink to a file in /tmp
+ whose name ends in the IP of the master. This is to ensure that a fresh slaves file is used
+ for every incarnation of the AMI and after each restart of the master instance.
+
+ Optionally, a persistent EBS volume is attached, formmatted (if needed) and mounted.
+ """
+
+ def __init__( self, user, shared_dir, install_dir, ephemeral_dir, persistent_dir, lazy_dirs ):
+ """
+ :param user: the user the services run as
+ :param install_dir: root installation directory, e.g. /opt
+ """
+ super( SparkTools, self ).__init__( )
+ self.user = user
+ self.shared_dir = shared_dir
+ self.install_dir = install_dir
+ self.ephemeral_dir = ephemeral_dir
+ self.persistent_dir = persistent_dir
+ self.uid = getpwnam( self.user ).pw_uid
+ self.gid = getgrnam( self.user ).gr_gid
+ self.lazy_dirs = lazy_dirs
+ self._patch_boto_config( )
+
+ def _patch_boto_config( self ):
+ from boto import config
+ def inject_default( name, default ):
+ section = 'Boto'
+ value = config.get( section, name )
+
+ if value != default:
+ if not config.has_section( section ):
+ config.add_section( section )
+ config.set( section, name, default )
+
+ # Override the 5xx retry limit default of 6
+ inject_default( 'num_retries', '12' )
+
+ def start( self ):
+ """
+ Invoked at boot time or when the sparkbox service is started.
+ """
+ while not os.path.exists( '/tmp/cloud-init.done' ):
+ log.info( "Waiting for cloud-init to finish ..." )
+ time.sleep( 1 )
+ log.info( "Starting sparkbox" )
+ self.__setup_etc_hosts( )
+ self.__mount_ebs_volume( )
+ self.__create_lazy_dirs( )
+
+ if self.master_ip == self.node_ip:
+ node_type = 'master'
+ self.__publish_host_key( )
+ self.__prepare_slaves_file( )
+ self.__format_namenode( )
+ else:
+ node_type = 'slave'
+ self.__get_master_host_key( )
+ self.__wait_for_master_ssh( )
+ self.__register_with_master( )
+ if self.shared_dir:
+ self._copy_dir_from_master( self.shared_dir )
+
+ log.info( "Starting %s services" % node_type )
+ check_call( [ initctl, 'emit', 'sparkbox-start-%s' % node_type ] )
+
+ def stop( self ):
+ """
+ Invoked at shutdown time or when the sparkbox service is stopped.
+ """
+ log.info( "Stopping sparkbox" )
+ self.__patch_etc_hosts( { 'spark-master': None } )
+
+ def manage_slaves( self, slaves_to_add=None ):
+ """
+ This method is invoked when the sparkbox-manage-slaves script is run. It has two modes:
+ the first mode initializes the slaves file when the master starts up. All currently
+ running slaves will be added to the slaves file. The second mode adds specific slaves to
+ the slaves, typically just one. This happens when the sparkbox-manage-slaves script is
+ invoked from a slave on the master via ssh.
+
+ :param slaves_to_add: an iterable yielding strings containing the IP address of a slave.
+ The format is IP : SSH_KEY_ALGO : SSH_HOST_KEY without the spaces. If this parameter is
+ empty or None, all slaves belonging to this master will be listed via EC2 and then added.
+ """
+ log.info( "Managing slaves file" )
+ slaves_path = "/tmp/slaves-" + self.master_ip
+ with open( slaves_path, 'a+' ) as f:
+ fcntl.flock( f, fcntl.LOCK_EX )
+ if slaves_to_add:
+ log.info( "Adding slaves: %r", slaves_to_add )
+ slaves = set( _.strip( ) for _ in f.readlines( ) )
+ slaves.update( _.split( ':' )[ 0 ] for _ in slaves_to_add )
+ else:
+ log.info( "Initializing slaves file" )
+ reservations = self.ec2.get_all_reservations(
+ filters={ 'tag:leader_instance_id': self.master_id } )
+ slaves = set( i.private_ip_address
+ for r in reservations
+ for i in r.instances if i.id != self.master_id )
+ log.info( "Found %i slave.", len( slaves ) )
+ if '' in slaves: slaves.remove( '' )
+ slaves = list( slaves )
+ slaves.sort( )
+ slaves.append( '' )
+ f.seek( 0 )
+ f.truncate( 0 )
+ f.write( '\n'.join( slaves ) )
+ if slaves_to_add:
+ log.info( "Adding host keys for slaves" )
+ self.__add_host_keys( slaves_to_add )
+
+ @classmethod
+ @memoize
+ def instance_data( cls, path ):
+ return urlopen( 'http://169.254.169.254/latest/' + path ).read( )
+
+ @classmethod
+ @memoize
+ def meta_data( cls, path ):
+ return cls.instance_data( 'meta-data/' + path )
+
+ @classmethod
+ @memoize
+ def user_data( cls ):
+ user_data = cls.instance_data( 'user-data' )
+ log.info( "User data is '%s'", user_data )
+ return user_data
+
+ @property
+ @memoize
+ def node_ip( self ):
+ ip = self.meta_data( 'local-ipv4' )
+ log.info( "Local IP is '%s'", ip )
+ return ip
+
+ @property
+ @memoize
+ def instance_id( self ):
+ instance_id = self.meta_data( 'instance-id' )
+ log.info( "Instance ID is '%s'", instance_id )
+ return instance_id
+
+ @property
+ @memoize
+ def availability_zone( self ):
+ zone = self.meta_data( 'placement/availability-zone' )
+ log.info( "Availability zone is '%s'", zone )
+ return zone
+
+ @property
+ @memoize
+ def region( self ):
+ m = re.match( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$', self.availability_zone )
+ assert m
+ region = m.group( 1 )
+ log.info( "Region is '%s'", region )
+ return region
+
+ @property
+ @memoize
+ def ec2( self ):
+ return boto.ec2.connect_to_region( self.region )
+
+ @property
+ @memoize
+ def master_id( self ):
+ master_id = self.instance_tag( 'leader_instance_id' )
+ if not master_id:
+ raise RuntimeError( "Instance not tagged with master's instance ID" )
+ log.info( "Master's instance ID is '%s'", master_id )
+ return master_id
+
+ @property
+ @memoize
+ def master_ip( self ):
+ if self.master_id == self.instance_id:
+ master_ip = self.node_ip
+ log.info( "I am the master" )
+ else:
+ log.info( "I am a slave" )
+ master_ip = self.master_instance.private_ip_address
+ log.info( "Master IP is '%s'", master_ip )
+ return master_ip
+
+ @property
+ @memoize
+ def is_spot_instance( self ):
+ result = bool( self.this_instance.spot_instance_request_id )
+ log.info( "I am %s spot instance", "a" if result else "not a" )
+ return result
+
+ @memoize
+ def instance( self, instance_id ):
+ """:rtype: Instance"""
+ instances = self.ec2.get_only_instances( instance_ids=[ instance_id ] )
+ assert len( instances ) == 1
+ instance = instances[ 0 ]
+ return instance
+
+ @property
+ @memoize
+ def this_instance( self ):
+ """:rtype: Instance"""
+ instance = self.instance( self.instance_id )
+ log.info( "I am running on %r", instance.__dict__ )
+ return instance
+
+ @property
+ @memoize
+ def master_instance( self ):
+ """:rtype: Instance"""
+ return self.instance( self.master_id )
+
+ @memoize
+ def instance_tag( self, key ):
+ """:rtype: str|None"""
+ return self.this_instance.tags.get( key )
+
+ def __mount_ebs_volume( self ):
+ """
+ Attach, format (if necessary) and mount the EBS volume with the same cluster ordinal as
+ this node.
+ """
+ ebs_volume_size = self.instance_tag( 'ebs_volume_size' ) or '0'
+ ebs_volume_size = int( ebs_volume_size )
+ if ebs_volume_size:
+ instance_name = self.instance_tag( 'Name' )
+ cluster_ordinal = int( self.instance_tag( 'cluster_ordinal' ) )
+ volume_name = '%s__%d' % (instance_name, cluster_ordinal)
+ volume = EC2VolumeHelper( ec2=self.ec2,
+ availability_zone=self.availability_zone,
+ name=volume_name,
+ size=ebs_volume_size,
+ volume_type="gp2" )
+ # TODO: handle case where volume is already attached
+ device_ext = '/dev/sdf'
+ device = '/dev/xvdf'
+ volume.attach( self.instance_id, device_ext )
+
+ # Wait for inode to appear and make sure its a block device
+ while True:
+ try:
+ assert stat.S_ISBLK( os.stat( device ).st_mode )
+ break
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ time.sleep( 1 )
+ else:
+ raise
+
+ # Only format empty volumes
+ volume_label = volume_label_hash( volume_name )
+ if check_output( [ 'file', '-sL', device ] ).strip( ) == device + ': data':
+ check_call( [ 'mkfs', '-t', 'ext4', device ] )
+ check_call( [ 'e2label', device, volume_label ] )
+ else:
+ # If the volume is not empty, verify the file system label
+ actual_label = check_output( [ 'e2label', device ] ).strip( )
+ if actual_label != volume_label:
+ raise AssertionError(
+ "Expected volume label '%s' (derived from '%s') but got '%s'" %
+ (volume_label, volume_name, actual_label) )
+ current_mount_point = self.__mount_point( device )
+ if current_mount_point is None:
+ mkdir_p( self.persistent_dir )
+ check_call( [ 'mount', device, self.persistent_dir ] )
+ elif current_mount_point == self.persistent_dir:
+ pass
+ else:
+ raise RuntimeError(
+ "Can't mount device %s on '%s' since it is already mounted on '%s'" % (
+ device, self.persistent_dir, current_mount_point) )
+ else:
+ # No persistent volume is attached and the root volume is off limits, so we will need
+ # to place persistent data on the ephemeral volume.
+ self.persistent_dir = self.ephemeral_dir
+
+ def __get_master_host_key( self ):
+ log.info( "Getting master's host key" )
+ master_host_key = self.master_instance.tags.get( 'ssh_host_key' )
+ if master_host_key:
+ self.__add_host_keys( [ 'spark-master:' + master_host_key ] )
+ else:
+ log.warn( "Could not get master's host key" )
+
+ def __add_host_keys( self, host_keys, globally=None ):
+ if globally is None:
+ globally = os.geteuid( ) == 0
+ if globally:
+ known_hosts_path = '/etc/ssh/ssh_known_hosts'
+ else:
+ known_hosts_path = os.path.expanduser( '~/.ssh/known_hosts' )
+ with open( known_hosts_path, 'a+' ) as f:
+ fcntl.flock( f, fcntl.LOCK_EX )
+ keys = set( _.strip( ) for _ in f.readlines( ) )
+ keys.update( ' '.join( _.split( ':' ) ) for _ in host_keys )
+ if '' in keys: keys.remove( '' )
+ keys = list( keys )
+ keys.sort( )
+ keys.append( '' )
+ f.seek( 0 )
+ f.truncate( 0 )
+ f.write( '\n'.join( keys ) )
+
+ def __wait_for_master_ssh( self ):
+ """
+ Wait until the instance represented by this box is accessible via SSH.
+ """
+ for _ in itertools.count( ):
+ s = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
+ try:
+ s.settimeout( 5 )
+ s.connect( ('spark-master', 22) )
+ return
+ except socket.error:
+ pass
+ finally:
+ s.close( )
+
+ def _copy_dir_from_master( self, path ):
+ log.info( "Copying %s from master" % path )
+ if not path.endswith( '/' ):
+ path += '/'
+ for tries in range( 5 ):
+ try:
+ check_call( [ sudo, '-u', self.user,
+ 'rsync', '-av', 'spark-master:' + path, path ] )
+ except CalledProcessError as e:
+ log.warn( "rsync returned %i, retrying in 5s", e.returncode )
+ time.sleep( 5 )
+ else:
+ return
+ raise RuntimeError( "Failed to copy %s from master" )
+
+ def __register_with_master( self ):
+ log.info( "Registering with master" )
+ for tries in range( 5 ):
+ try:
+ check_call(
+ [ sudo, '-u', self.user, 'ssh', 'spark-master', 'sparkbox-manage-slaves',
+ self.node_ip + ":" + self.__get_host_key( ) ] )
+ except CalledProcessError as e:
+ log.warn( "rsync returned %i, retrying in 5s", e.returncode )
+ time.sleep( 5 )
+ else:
+ return
+ raise RuntimeError( "Failed to register with master" )
+
+ def __get_host_key( self ):
+ with open( '/etc/ssh/ssh_host_ecdsa_key.pub' ) as f:
+ return ':'.join( f.read( ).split( )[ :2 ] )
+
+ def __publish_host_key( self ):
+ master_host_key = self.__get_host_key( )
+ self.ec2.create_tags( [ self.master_id ], dict( ssh_host_key=master_host_key ) )
+
+ def __create_lazy_dirs( self ):
+ log.info( "Bind-mounting directory structure" )
+ for (parent, name, persistent) in self.lazy_dirs:
+ assert parent[ 0 ] == os.path.sep
+ logical_path = os.path.join( parent, name )
+ if persistent is None:
+ tag = 'persist' + logical_path.replace( os.path.sep, '_' )
+ persistent = less_strict_bool( self.instance_tag( tag ) )
+ location = self.persistent_dir if persistent else self.ephemeral_dir
+ physical_path = os.path.join( location, parent[ 1: ], name )
+ mkdir_p( physical_path )
+ os.chown( physical_path, self.uid, self.gid )
+ logical_path = os.path.join( parent, name )
+ check_call( [ 'mount', '--bind', physical_path, logical_path ] )
+
+ def __prepare_slaves_file( self ):
+ log.info( "Preparing slaves file" )
+ tmp_slaves = "/tmp/slaves-" + self.master_ip
+ open( tmp_slaves, "a" ).close( )
+ os.chown( tmp_slaves, self.uid, self.gid )
+ self.__symlink( self.install_dir + "/hadoop/etc/hadoop/slaves", tmp_slaves )
+ self.__symlink( self.install_dir + "/spark/conf/slaves", tmp_slaves )
+
+ def __format_namenode( self ):
+ log.info( "Formatting namenode" )
+ try:
+ check_output( [ 'sudo', '-u', self.user,
+ self.install_dir + '/hadoop/bin/hdfs', 'namenode',
+ '-format',
+ '-nonInteractive' ], stderr=STDOUT )
+ except CalledProcessError as e:
+ if e.returncode == 1 and 'data appears to exist in Storage Directory' in e.output:
+ pass
+ else:
+ raise
+
+ def __setup_etc_hosts( self ):
+ hosts = self.instance_tag( 'etc_hosts_entries' ) or ""
+ hosts = parse_etc_hosts_entries( hosts )
+ hosts[ 'spark-master' ] = self.master_ip
+ self.__patch_etc_hosts( hosts )
+
+ def __patch_etc_hosts( self, hosts ):
+ log.info( "Patching /etc/host" )
+ # FIXME: The handling of /etc/hosts isn't atomic
+ with open( '/etc/hosts', 'r+' ) as etc_hosts:
+ lines = [ line
+ for line in etc_hosts.readlines( )
+ if not any( host in line for host in hosts.iterkeys( ) ) ]
+ for host, ip in hosts.iteritems( ):
+ if ip: lines.append( "%s %s\n" % (ip, host) )
+ etc_hosts.seek( 0 )
+ etc_hosts.truncate( 0 )
+ etc_hosts.writelines( lines )
+
+ def __symlink( self, symlink, target ):
+ if os.path.lexists( symlink ): os.unlink( symlink )
+ os.symlink( target, symlink )
+
+ def __mount_point( self, device ):
+ with open( '/proc/mounts' ) as f:
+ for line in f:
+ line = line.split( )
+ if line[ 0 ] == device:
+ return line[ 1 ]
+ return None
+
+def parse_etc_hosts_entries( hosts ):
+ """
+ >>> parse_etc_hosts_entries("").items()
+ []
+ >>> parse_etc_hosts_entries("foo:1.2.3.4").items()
+ [('foo', '1.2.3.4')]
+ >>> parse_etc_hosts_entries(" foo : 1.2.3.4 , bar : 2.3.4.5 ").items()
+ [('foo', '1.2.3.4'), ('bar', '2.3.4.5')]
+ """
+ return OrderedDict( (ip.strip( ), name.strip( ))
+ for ip, name in (entry.split( ':', 1 )
+ for entry in hosts.split( ',' ) if entry) )
diff --git a/spark/.gitignore b/spark/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/spark/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/spark/README.rst b/spark/README.rst
new file mode 100644
index 0000000..7442791
--- /dev/null
+++ b/spark/README.rst
@@ -0,0 +1,108 @@
+The CGCloud plugin for Spark lets you setup a fully configured Apache
+Spark cluster in EC2 in just minutes, regardless of the number of nodes. While
+Apache Spark already comes with a script called ``spark-ec2`` that lets you
+build a cluster in EC2, CGCloud Spark differs from ``spark-ec2`` in the
+following ways:
+
+* Tachyon or Yarn are not included
+
+* Setup time does not scale linearly with the number of nodes. Setting up a 100
+ node cluster takes just as long as setting up a 10 node cluster (2-3 min, as
+ opposed to 45min with ``spark-ec2``). This is made possible by baking all
+ required software into a single AMI. All slave nodes boot up concurrently and
+ join the cluster autonomously in just a few minutes.
+
+* Unlike with ``spark-ec2``, the cluster can be stopped and started via the EC2
+ API or the EC2 console, without involvement of cgcloud.
+
+* The Spark services (master and worker) run as an unprivileged user, not root
+ as with spark-ec2. Ditto for the HDFS services (namenode, datanode and
+ secondarynamenode).
+
+* The Spark and Hadoop services are started automatically as the instance boots
+ up, via a regular init script.
+
+* Nodes can be added easily, simply by booting up new instances from the AMI.
+ They will join the cluster automatically. HDFS may have to be rebalanced
+ after that.
+
+* You can customize the AMI that cluster nodes boot from by subclassing the
+ SparkMaster and SparkSlave classes.
+
+* CGCloud Spark uses the CGCLoud Agent which takes care of maintaining a list
+ of authorized keypairs on each node.
+
+* CGCloud Spark is based on the official Ubuntu Trusty 14.04 LTS, not the
+ Amazon Linux AMI.
+
+
+Prerequisites
+=============
+
+The ``cgcloud-spark`` package requires that the ``cgcloud-core`` package and
+its prerequisites_ are present.
+
+.. _prerequisites: ../core#prerequisites
+
+
+Installation
+============
+
+Read the entire section before pasting any commands and ensure that all
+prerequisites are installed. It is recommended to install this plugin into the
+virtualenv you created for CGCloud::
+
+ source ~/cgcloud/bin/activate
+ pip install cgcloud-spark
+
+If you get ``DistributionNotFound: No distributions matching the version for
+cgcloud-spark``, try running ``pip install --pre cgcloud-spark``.
+
+Be sure to configure_ ``cgcloud-core`` before proceeding.
+
+.. _configure: ../core/README.rst#configuration
+
+Configuration
+=============
+
+Modify your ``.profile`` or ``.bash_profile`` by adding the following line::
+
+ export CGCLOUD_PLUGINS="cgcloud.spark:$CGCLOUD_PLUGINS"
+
+Login and out (or, on OS X, start a new Terminal tab/window).
+
+Verify the installation by running::
+
+ cgcloud list-roles
+
+The output should include the ``spark-box`` role.
+
+Usage
+=====
+
+Create a single ``t2.micro`` box to serve as the template for the cluster
+nodes::
+
+ cgcloud create -IT spark-box
+
+The ``I`` option stops the box once it is fully set up and takes an image (AMI)
+of it. The ``T`` option terminates the box after that.
+
+Now create a cluster by booting a master and the slaves from that AMI::
+
+ cgcloud create-cluster spark -s 2 -t m3.large
+
+This will launch a master and two slaves using the ``m3.large`` instance type.
+
+SSH into the master::
+
+ cgcloud ssh spark-master
+
+... or the first slave::
+
+ cgcloud ssh -o 0 spark-slave
+
+... or the second slave::
+
+ cgcloud ssh -o 1 spark-slave
+
diff --git a/spark/setup.cfg b/spark/setup.cfg
new file mode 100644
index 0000000..082e511
--- /dev/null
+++ b/spark/setup.cfg
@@ -0,0 +1,5 @@
+[pytest]
+# Look for any python file, the default of test_*.py wouldn't work for us
+python_files=*.py
+# Also run doctests
+addopts = --doctest-modules
diff --git a/spark/setup.py b/spark/setup.py
new file mode 100644
index 0000000..8858412
--- /dev/null
+++ b/spark/setup.py
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+
+from setuptools import setup, find_packages
+
+from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep
+
+setup( name='cgcloud-spark',
+ version=cgcloud_version,
+
+ author='Hannes Schmidt',
+ author_email='hannes at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage a Apache Spark cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[ bd2k_python_lib_dep,
+ 'cgcloud-lib==' + cgcloud_version,
+ 'cgcloud-core==' + cgcloud_version,
+ fabric_dep ] )
diff --git a/spark/src/cgcloud/__init__.py b/spark/src/cgcloud/__init__.py
new file mode 100644
index 0000000..ce648cf
--- /dev/null
+++ b/spark/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
diff --git a/spark/src/cgcloud/spark/__init__.py b/spark/src/cgcloud/spark/__init__.py
new file mode 100644
index 0000000..dc88eae
--- /dev/null
+++ b/spark/src/cgcloud/spark/__init__.py
@@ -0,0 +1,8 @@
+def roles( ):
+ from cgcloud.spark.spark_box import SparkBox, SparkSlave, SparkMaster
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
+
+
+def cluster_types( ):
+ from cgcloud.spark.spark_cluster import SparkCluster
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
diff --git a/spark/src/cgcloud/spark/spark_box.py b/spark/src/cgcloud/spark/spark_box.py
new file mode 100644
index 0000000..4c6c29b
--- /dev/null
+++ b/spark/src/cgcloud/spark/spark_box.py
@@ -0,0 +1,445 @@
+import logging
+from StringIO import StringIO
+from collections import namedtuple
+
+from bd2k.util.iterables import concat
+from bd2k.util.strings import interpolate as fmt
+from fabric.context_managers import settings
+from fabric.operations import run, put
+
+from cgcloud.core.apache import ApacheSoftwareBox
+from cgcloud.core.box import fabric_task
+from cgcloud.core.cluster import ClusterBox, ClusterLeader, ClusterWorker
+from cgcloud.core.common_iam_policies import ec2_read_only_policy
+from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox
+from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox
+from cgcloud.fabric.operations import sudo, remote_open, pip, sudov
+from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc
+
+log = logging.getLogger( __name__ )
+
+user = 'sparkbox'
+install_dir = '/opt/sparkbox'
+log_dir = "/var/log/sparkbox"
+ephemeral_dir = '/mnt/ephemeral'
+persistent_dir = '/mnt/persistent'
+var_dir = '/var/lib/sparkbox'
+hdfs_replication = 1
+hadoop_version = '2.6.0'
+spark_version = '1.6.2'
+# The major version of Hadoop that the Spark binaries were built against
+spark_hadoop_version = '2.6'
+
+Service = namedtuple( 'Service', [
+ 'init_name',
+ 'description',
+ 'start_script',
+ 'stop_script' ] )
+
+
+def hdfs_service( name ):
+ script = '{install_dir}/hadoop/sbin/hadoop-daemon.sh {action} {name}'
+ return Service(
+ init_name='hdfs-' + name,
+ description=fmt( "Hadoop DFS {name} service" ),
+ start_script=fmt( script, action='start' ),
+ stop_script=fmt( script, action='stop' ) )
+
+
+def spark_service( name, script_suffix=None ):
+ if script_suffix is None: script_suffix = name
+ script = '{install_dir}/spark/sbin/{action}-{script_suffix}.sh'
+ return Service(
+ init_name='spark-' + name,
+ description=fmt( "Spark {name} service" ),
+ start_script=fmt( script, action='start' ),
+ stop_script=fmt( script, action='stop' ) )
+
+
+hadoop_services = dict(
+ master=[ hdfs_service( 'namenode' ), hdfs_service( 'secondarynamenode' ) ],
+ slave=[ hdfs_service( 'datanode' ) ] )
+
+spark_services = dict(
+ master=[ spark_service( 'master' ) ],
+ # FIXME: The start-slaves.sh script actually does ssh localhost on a slave so I am not sure
+ # this is the right thing to do. OTOH, it is the only script starts Tachyon and sets up the
+ # spark:// URL pointing at the master. We would need to duplicate some of its functionality
+ # if we wanted to eliminate the ssh call.
+ slave=[ spark_service( 'slave', 'slaves' ) ] )
+
+
+class SparkBox( ApacheSoftwareBox,
+ ClusterBox,
+ GenericUbuntuTrustyBox,
+ Python27UpdateUbuntuBox ):
+ """
+ A node in a Spark cluster; used only to create an image for master and worker boxes
+
+ Workers and the master undergo the same setup. Whether a node acts as a master or a slave is
+ determined at boot time, via user data. All slave nodes will be passed the IP of the master
+ node. This implies that the master is started first. As soon as its private IP is assigned,
+ typically seconds after the reservation has been submitted, the slaves can be started up.
+ """
+
+ @classmethod
+ def get_role_options( cls ):
+ return super( SparkBox, cls ).get_role_options( ) + [
+ cls.RoleOption( name='etc_hosts_entries',
+ type=str,
+ repr=str,
+ inherited=True,
+ help="Additional entries for /etc/hosts in the form "
+ "'foo:1.2.3.4,bar:2.3.4.5'" ) ]
+
+ def other_accounts( self ):
+ return super( SparkBox, self ).other_accounts( ) + [ user ]
+
+ def default_account( self ):
+ return user
+
+ def __init__( self, ctx ):
+ super( SparkBox, self ).__init__( ctx )
+ self.lazy_dirs = set( )
+
+ def _populate_security_group( self, group_id ):
+ return super( SparkBox, self )._populate_security_group( group_id ) + [
+ dict( ip_protocol='tcp', from_port=0, to_port=65535,
+ src_security_group_group_id=group_id ),
+ dict( ip_protocol='udp', from_port=0, to_port=65535,
+ src_security_group_group_id=group_id ) ]
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( SparkBox, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( SparkBox )
+ policies.update( dict(
+ ec2_read_only=ec2_read_only_policy,
+ ec2_spark_box=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) )
+ return iam_role_name, policies
+
+ @fabric_task
+ def _setup_package_repos( self ):
+ super( SparkBox, self )._setup_package_repos( )
+ sudo( 'add-apt-repository -y ppa:webupd8team/java' )
+
+ def _list_packages_to_install( self ):
+ return super( SparkBox, self )._list_packages_to_install( ) + [
+ 'oracle-java8-set-default' ]
+
+ def _get_debconf_selections( self ):
+ return super( SparkBox, self )._get_debconf_selections( ) + [
+ 'debconf shared/accepted-oracle-license-v1-1 select true',
+ 'debconf shared/accepted-oracle-license-v1-1 seen true' ]
+
+ def _pre_install_packages( self ):
+ super( SparkBox, self )._pre_install_packages( )
+ self.__setup_application_user( )
+
+ @fabric_task
+ def __setup_application_user( self ):
+ sudo( fmt( 'useradd '
+ '--home /home/{user} '
+ '--create-home '
+ '--user-group '
+ '--shell /bin/bash {user}' ) )
+
+ def _post_install_packages( self ):
+ super( SparkBox, self )._post_install_packages( )
+ self._propagate_authorized_keys( user, user )
+ self.__setup_shared_dir( )
+ self.__setup_ssh_config( )
+ self.__create_spark_keypair( )
+ self.__install_hadoop( )
+ self.__install_spark( )
+ self.__setup_path( )
+ self.__install_tools( )
+
+ def _shared_dir( self ):
+ return '/home/%s/shared' % self.default_account( )
+
+ @fabric_task
+ def __setup_shared_dir( self ):
+ sudov( 'install', '-d', self._shared_dir( ), '-m', '700', '-o', self.default_account( ) )
+
+ @fabric_task
+ def __setup_ssh_config( self ):
+ with remote_open( '/etc/ssh/ssh_config', use_sudo=True ) as f:
+ f.write( heredoc( """
+ Host spark-master
+ CheckHostIP no
+ HashKnownHosts no""" ) )
+
+ @fabric_task( user=user )
+ def __create_spark_keypair( self ):
+ self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ),
+ private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ),
+ overwrite_ec2=True )
+ # This trick allows us to roam freely within the cluster as the app user while still
+ # being able to have keypairs in authorized_keys managed by cgcloudagent such that
+ # external users can login as the app user, too. The trick depends on AuthorizedKeysFile
+ # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config
+ run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" )
+
+ def __ec2_keypair_name( self, ctx ):
+ return user + '@' + ctx.to_aws_name( self.role( ) )
+
+ @fabric_task
+ def __install_hadoop( self ):
+ # Download and extract Hadoop
+ path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' )
+ self._install_apache_package( path, install_dir )
+
+ # Add environment variables to hadoop_env.sh
+ hadoop_env = dict(
+ HADOOP_LOG_DIR=self._lazy_mkdir( log_dir, "hadoop" ),
+ JAVA_HOME='/usr/lib/jvm/java-8-oracle' )
+ hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh" )
+ with remote_open( hadoop_env_sh_path, use_sudo=True ) as hadoop_env_sh:
+ hadoop_env_sh.write( '\n' )
+ for name, value in hadoop_env.iteritems( ):
+ hadoop_env_sh.write( fmt( 'export {name}="{value}"\n' ) )
+
+ # Configure HDFS
+ hdfs_dir = var_dir + "/hdfs"
+ put( use_sudo=True,
+ remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/hdfs-site.xml' ),
+ local_path=StringIO( self.__to_hadoop_xml_config( {
+ 'dfs.replication': str( hdfs_replication ),
+ 'dfs.permissions': 'false',
+ 'dfs.name.dir': self._lazy_mkdir( hdfs_dir, 'name', persistent=True ),
+ 'dfs.data.dir': self._lazy_mkdir( hdfs_dir, 'data', persistent=True ),
+ 'fs.checkpoint.dir': self._lazy_mkdir( hdfs_dir, 'checkpoint', persistent=True ),
+ 'dfs.namenode.http-address': 'spark-master:50070',
+ 'dfs.namenode.secondary.http-address': 'spark-master:50090' } ) ) )
+
+ # Configure Hadoop
+ put( use_sudo=True,
+ remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/core-site.xml' ),
+ local_path=StringIO( self.__to_hadoop_xml_config( {
+ 'fs.default.name': 'hdfs://spark-master:8020' } ) ) )
+
+ # Make shell auto completion easier
+ sudo( fmt( 'find {install_dir}/hadoop -name "*.cmd" | xargs rm' ) )
+
+ # Install upstart jobs
+ self.__register_upstart_jobs( hadoop_services )
+
+ @staticmethod
+ def __to_hadoop_xml_config( properties ):
+ """
+ >>> print SparkBox._SparkBox__to_hadoop_xml_config( {'foo' : 'bar'} )
+ <?xml version='1.0' encoding='utf-8'?>
+ <?xml-stylesheet type='text/xsl' href='configuration.xsl'?>
+ <configuration>
+ <property>
+ <name>foo</name>
+ <value>bar</value>
+ </property>
+ </configuration>
+ <BLANKLINE>
+ """
+ s = StringIO( )
+ s.write( heredoc( """
+ <?xml version='1.0' encoding='utf-8'?>
+ <?xml-stylesheet type='text/xsl' href='configuration.xsl'?>
+ <configuration>""" ) )
+ for name, value in properties.iteritems( ):
+ s.write( heredoc( """
+ <property>
+ <name>{name}</name>
+ <value>{value}</value>
+ </property>""", indent=' ' ) )
+ s.write( "</configuration>\n" )
+ return s.getvalue( )
+
+ @fabric_task
+ def __install_spark( self ):
+ # Download and extract Spark
+ path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' )
+ self._install_apache_package( path, install_dir )
+
+ spark_dir = var_dir + "/spark"
+
+ # Add environment variables to spark_env.sh
+ spark_env_sh_path = fmt( "{install_dir}/spark/conf/spark-env.sh" )
+ sudo( fmt( "cp {spark_env_sh_path}.template {spark_env_sh_path}" ) )
+ spark_env = dict(
+ SPARK_LOG_DIR=self._lazy_mkdir( log_dir, "spark" ),
+ SPARK_WORKER_DIR=self._lazy_mkdir( spark_dir, "work" ),
+ SPARK_LOCAL_DIRS=self._lazy_mkdir( spark_dir, "local" ),
+ JAVA_HOME='/usr/lib/jvm/java-8-oracle',
+ SPARK_MASTER_IP='spark-master',
+ HADOOP_CONF_DIR=fmt( "{install_dir}/hadoop/etc/hadoop" ) )
+ with remote_open( spark_env_sh_path, use_sudo=True ) as spark_env_sh:
+ spark_env_sh.write( '\n' )
+ for name, value in spark_env.iteritems( ):
+ spark_env_sh.write( fmt( 'export {name}="{value}"\n' ) )
+
+ # Configure Spark properties
+ spark_defaults = {
+ 'spark.eventLog.enabled': 'true',
+ 'spark.eventLog.dir': self._lazy_mkdir( spark_dir, "history" ),
+ 'spark.master': 'spark://spark-master:7077'
+ }
+ spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf" )
+ sudo( fmt( "cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" ) )
+ with remote_open( spark_defaults_conf_path, use_sudo=True ) as spark_defaults_conf:
+ for name, value in spark_defaults.iteritems( ):
+ spark_defaults_conf.write( fmt( "{name}\t{value}\n" ) )
+
+ # Make shell auto completion easier
+ sudo( fmt( 'find {install_dir}/spark -name "*.cmd" | xargs rm' ) )
+
+ # Install upstart jobs
+ self.__register_upstart_jobs( spark_services )
+
+
+ @fabric_task
+ def __install_tools( self ):
+ """
+ Installs the spark-master-discovery init script and its companion spark-tools. The latter
+ is a Python package distribution that's included in cgcloud-spark as a resource. This is
+ in contrast to the cgcloud agent, which is a standalone distribution.
+ """
+ tools_dir = install_dir + '/tools'
+ admin = self.admin_account( )
+ sudo( fmt( 'mkdir -p {tools_dir}' ) )
+ sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
+ run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
+ run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )
+
+ with settings( forward_agent=True ):
+ with self._project_artifacts( 'spark-tools' ) as artifacts:
+ pip( use_sudo=True,
+ path=tools_dir + '/bin/pip',
+ args=concat( 'install', artifacts ) )
+ sudo( fmt( 'chown -R root:root {tools_dir}' ) )
+
+ spark_tools = "SparkTools(**%r)" % dict( user=user,
+ shared_dir=self._shared_dir( ),
+ install_dir=install_dir,
+ ephemeral_dir=ephemeral_dir,
+ persistent_dir=persistent_dir,
+ lazy_dirs=self.lazy_dirs )
+
+ self.lazy_dirs = None # make sure it can't be used anymore once we are done with it
+
+ self._register_init_script(
+ "sparkbox",
+ heredoc( """
+ description "Spark/HDFS master discovery"
+ console log
+ start on (local-filesystems and net-device-up IFACE!=lo)
+ stop on runlevel [!2345]
+ pre-start script
+ for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
+ import logging
+ logging.basicConfig( level=logging.INFO )
+ from cgcloud.spark_tools import SparkTools
+ spark_tools = {spark_tools}
+ spark_tools.start()
+ END
+ then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
+ end script
+ post-stop script
+ {tools_dir}/bin/python2.7 - <<END
+ import logging
+ logging.basicConfig( level=logging.INFO )
+ from cgcloud.spark_tools import SparkTools
+ spark_tools = {spark_tools}
+ spark_tools.stop()
+ END
+ end script""" ) )
+
+ script_path = "/usr/local/bin/sparkbox-manage-slaves"
+ put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """
+ #!{tools_dir}/bin/python2.7
+ import sys
+ import logging
+ # Prefix each log line to make it more obvious that it's the master logging when the
+ # slave calls this script via ssh.
+ logging.basicConfig( level=logging.INFO,
+ format="manage_slaves: " + logging.BASIC_FORMAT )
+ from cgcloud.spark_tools import SparkTools
+ spark_tools = {spark_tools}
+ spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) )
+ sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
+
+ @fabric_task
+ def _lazy_mkdir( self, parent, name, persistent=False ):
+ """
+ __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that
+ /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts.
+ Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that
+ /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts.
+
+ Note that at start-up time, /mnt/persistent may be reassigned to /mnt/ephemeral if no
+ EBS volume is mounted at /mnt/persistent.
+
+ _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir'
+ when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's
+ value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False.
+ """
+ assert self.lazy_dirs is not None
+ assert '/' not in name
+ assert parent.startswith( '/' )
+ for location in (persistent_dir, ephemeral_dir):
+ assert location.startswith( '/' )
+ assert not location.startswith( parent ) and not parent.startswith( location )
+ logical_path = parent + '/' + name
+ sudo( 'mkdir -p "%s"' % logical_path )
+ self.lazy_dirs.add( (parent, name, persistent) )
+ return logical_path
+
+ def __register_upstart_jobs( self, service_map ):
+ for node_type, services in service_map.iteritems( ):
+ start_on = "sparkbox-start-" + node_type
+ for service in services:
+ self._register_init_script(
+ service.init_name,
+ heredoc( """
+ description "{service.description}"
+ console log
+ start on {start_on}
+ stop on runlevel [016]
+ setuid {user}
+ setgid {user}
+ env USER={user}
+ pre-start exec {service.start_script}
+ post-stop exec {service.stop_script}""" ) )
+ start_on = "started " + service.init_name
+
+ @fabric_task
+ def __setup_path( self ):
+ globally = True
+ if globally:
+ with remote_open( '/etc/environment', use_sudo=True ) as f:
+ new_path = [ fmt( '{install_dir}/{package}/bin' )
+ for package in ('spark', 'hadoop') ]
+ self._patch_etc_environment( f, dirs=new_path )
+ else:
+ for _user in (user, self.admin_account( )):
+ with settings( user=_user ):
+ with remote_open( '~/.profile' ) as f:
+ f.write( '\n' )
+ for package in ('spark', 'hadoop'):
+ # We don't include sbin here because too many file names collide in
+ # Spark's and Hadoop's sbin
+ f.write( fmt( 'PATH="$PATH:{install_dir}/{package}/bin"\n' ) )
+
+
+class SparkMaster( SparkBox, ClusterLeader ):
+ """
+ The master of a cluster of boxes created from a spark-box image
+ """
+ pass
+
+
+class SparkSlave( SparkBox, ClusterWorker ):
+ """
+ A slave in a cluster of boxes created from a spark-box image
+ """
+ pass
diff --git a/spark/src/cgcloud/spark/spark_cluster.py b/spark/src/cgcloud/spark/spark_cluster.py
new file mode 100644
index 0000000..18dfdde
--- /dev/null
+++ b/spark/src/cgcloud/spark/spark_cluster.py
@@ -0,0 +1,12 @@
+from cgcloud.core.cluster import Cluster
+from cgcloud.spark.spark_box import SparkMaster, SparkSlave
+
+
+class SparkCluster( Cluster ):
+ @property
+ def worker_role( self ):
+ return SparkSlave
+
+ @property
+ def leader_role( self ):
+ return SparkMaster
diff --git a/spark/src/cgcloud/spark/test/__init__.py b/spark/src/cgcloud/spark/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/spark/src/cgcloud/spark/test/conftest.py b/spark/src/cgcloud/spark/test/conftest.py
new file mode 100644
index 0000000..862a5a2
--- /dev/null
+++ b/spark/src/cgcloud/spark/test/conftest.py
@@ -0,0 +1 @@
+from cgcloud.core.test.conftest import pytest_configure
diff --git a/spark/src/cgcloud/spark/test/test_spark.py b/spark/src/cgcloud/spark/test/test_spark.py
new file mode 100644
index 0000000..e292c51
--- /dev/null
+++ b/spark/src/cgcloud/spark/test/test_spark.py
@@ -0,0 +1,133 @@
+from inspect import getsource
+import os
+from textwrap import dedent
+import time
+import logging
+import unittest
+
+from cgcloud.core.test import CoreTestCase
+from cgcloud.spark.spark_box import install_dir, SparkBox, SparkMaster, SparkSlave
+
+log = logging.getLogger( __name__ )
+
+master = SparkMaster.role( )
+slave = SparkSlave.role( )
+node = SparkBox.role( )
+
+num_slaves = 2
+
+
+class SparkClusterTests( CoreTestCase ):
+ """
+ Covers the creation of a Spark cluster from scratch and running a simple Spark job on it.
+ Also covers persistant HDFS between two cluster incarnations.
+ """
+ cleanup = True
+ create_image = True
+
+ @classmethod
+ def setUpClass( cls ):
+ os.environ[ 'CGCLOUD_PLUGINS' ] = 'cgcloud.spark'
+ super( SparkClusterTests, cls ).setUpClass( )
+ if cls.create_image:
+ cls._cgcloud( 'create', node, '-IT' )
+
+ @classmethod
+ def tearDownClass( cls ):
+ if cls.cleanup and cls.create_image:
+ cls._cgcloud( 'delete-image', node )
+ super( SparkClusterTests, cls ).tearDownClass( )
+
+ def test_wordcount( self ):
+ self._create_cluster( )
+ try:
+ self._assert_remote_failure( master )
+ self._wait_for_slaves( )
+ self._word_count( )
+ finally:
+ if self.cleanup:
+ self._terminate_cluster( )
+
+ def test_persistence( self ):
+ volume_size_gb = 1
+ self._create_cluster( '--ebs-volume-size', str( volume_size_gb ) )
+ try:
+ try:
+ self._wait_for_slaves( )
+ # Create and checksum a random file taking up 75% of the cluster's theoretical
+ # storage capacity an replication factor of 1.
+ test_file_size_mb = volume_size_gb * 1024 * num_slaves * 3 / 4
+ self._ssh( master, 'dd if=/dev/urandom bs=1M count=%d '
+ '| tee >(md5sum > test.bin.md5) '
+ '| hdfs dfs -put -f - /test.bin' % test_file_size_mb )
+ self._ssh( master, 'hdfs dfs -put -f test.bin.md5 /' )
+ finally:
+ self._terminate_cluster( )
+ self._create_cluster( '--ebs-volume-size', str( volume_size_gb ) )
+ try:
+ self._wait_for_slaves( )
+ self._ssh( master, 'test "$(hdfs dfs -cat /test.bin.md5)" '
+ '== "$(hdfs dfs -cat /test.bin | md5sum)"' )
+ finally:
+ if self.cleanup:
+ self._terminate_cluster( )
+ finally:
+ if self.cleanup:
+ self._delete_volumes( )
+
+ def _create_cluster( self, *args ):
+ self._cgcloud( 'create-cluster', 'spark', '-t=m3.medium', '-s', str( num_slaves ), *args )
+
+ def _terminate_cluster( self ):
+ self._cgcloud( 'terminate-cluster', 'spark' )
+
+ def _wait_for_slaves( self ):
+ delay = 5
+ expiration = time.time( ) + 10 * 60
+ commands = [
+ 'test $(cat %s/spark/conf/slaves | wc -l) = %s' % (install_dir, num_slaves),
+ "hdfs dfsadmin -report -live | fgrep 'Live datanodes (%s)'" % num_slaves ]
+ for command in commands:
+ while True:
+ try:
+ self._ssh( master, command )
+ except SystemExit:
+ if time.time( ) + delay >= expiration:
+ self.fail( "Cluster didn't come up in time" )
+ time.sleep( delay )
+ else:
+ break
+
+ @unittest.skip( 'Only for interactive invocation' )
+ def test_word_count_only( self ):
+ self._word_count( )
+
+ def _word_count( self ):
+ self._ssh( master, 'hdfs dfs -rm -r -f -skipTrash /test.txt /test.txt.counts' )
+ self._ssh( master, 'rm -rf test.txt test.txt.counts' )
+ self._ssh( master, 'curl -o test.txt https://www.apache.org/licenses/LICENSE-2.0.txt' )
+ self._ssh( master, 'hdfs dfs -put -f test.txt /' )
+
+ def word_count( ):
+ # noinspection PyUnresolvedReferences
+ from pyspark import SparkContext
+ sc = SparkContext( appName='PythonPi' )
+ input = sc.textFile( '/test.txt' )
+ counts = (input
+ .flatMap( lambda line: line.split( " " ) )
+ .map( lambda word: (word, 1) )
+ .reduceByKey( lambda a, b: a + b ))
+ counts.saveAsTextFile( '/test.txt.counts' )
+
+ script = 'wordcount.py'
+ body = dedent( '\n'.join( getsource( word_count ).split( '\n' )[ 1: ] ) )
+ self._send_file( master, body, script )
+
+ self._ssh( master, 'spark-submit ' + script )
+ self._ssh( master, 'hdfs dfs -get /test.txt.counts' )
+ self._ssh( master, 'test -f test.txt.counts/_SUCCESS' )
+ for i in xrange( num_slaves ):
+ self._ssh( master, 'test -s test.txt.counts/part-%05d' % i )
+
+ def _delete_volumes( self ):
+ pass
diff --git a/toil/.gitignore b/toil/.gitignore
new file mode 100644
index 0000000..0ff20ef
--- /dev/null
+++ b/toil/.gitignore
@@ -0,0 +1,6 @@
+/build
+/dist
+*.egg-info
+*.pyc
+/MANIFEST.in
+/version.py
diff --git a/toil/README.rst b/toil/README.rst
new file mode 100644
index 0000000..e81d766
--- /dev/null
+++ b/toil/README.rst
@@ -0,0 +1,77 @@
+The CGCloud plugin for Toil lets you setup a fully configured Toil/Mesos
+cluster in EC2 in just minutes, regardless of the number of nodes.
+
+
+Prerequisites
+=============
+
+The ``cgcloud-toil`` package requires that the ``cgcloud-core`` package and
+its prerequisites_ are present.
+
+.. _prerequisites: ../core#prerequisites
+
+
+Installation
+============
+
+Read the entire section before pasting any commands and ensure that all
+prerequisites are installed. It is recommended to install this plugin into the
+virtualenv you created for CGCloud::
+
+ source ~/cgcloud/bin/activate
+ pip install cgcloud-toil
+
+If you get ``DistributionNotFound: No distributions matching the version for
+cgcloud-toil``, try running ``pip install --pre cgcloud-toil``.
+
+Be sure to configure_ ``cgcloud-core`` before proceeding.
+
+.. _configure: ../core/README.rst#configuration
+
+Configuration
+=============
+
+Modify your ``.profile`` or ``.bash_profile`` by adding the following line::
+
+ export CGCLOUD_PLUGINS="cgcloud.toil:$CGCLOUD_PLUGINS"
+
+Login and out (or, on OS X, start a new Terminal tab/window).
+
+Verify the installation by running::
+
+ cgcloud list-roles
+
+The output should include the ``toil-box`` role.
+
+Usage
+=====
+
+Create a single ``t2.micro`` box to serve as the template for the cluster
+nodes::
+
+ cgcloud create -IT toil-box
+
+The ``I`` option stops the box once it is fully set up and takes an image (AMI)
+of it. The ``T`` option terminates the box after that.
+
+Substitute ``toil-latest-box`` for ``toil-box`` if you want to use the latest
+unstable release of Toil.
+
+Now create a cluster by booting a leader and the workers from that AMI::
+
+ cgcloud create-cluster toil -s 2 -t m3.large
+
+This will launch a leader and two workers using the ``m3.large`` instance type.
+
+SSH into the leader::
+
+ cgcloud ssh toil-leader
+
+... or the first worker::
+
+ cgcloud ssh -o 0 toil-worker
+
+... or the second worker::
+
+ cgcloud ssh -o 1 toil-worker
+
diff --git a/toil/setup.py b/toil/setup.py
new file mode 100644
index 0000000..fe8041a
--- /dev/null
+++ b/toil/setup.py
@@ -0,0 +1,20 @@
+from __future__ import absolute_import
+from setuptools import setup, find_packages
+from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep
+
+setup( name='cgcloud-toil',
+ version=cgcloud_version,
+
+ author='Christopher Ketchum',
+ author_email='cketchum at ucsc.edu',
+ url='https://github.com/BD2KGenomics/cgcloud',
+ description='Setup and manage a toil and Apache Mesos cluster in EC2',
+
+ package_dir={ '': 'src' },
+ packages=find_packages( 'src' ),
+ namespace_packages=[ 'cgcloud' ],
+ install_requires=[ 'cgcloud-lib==' + cgcloud_version,
+ 'cgcloud-core==' + cgcloud_version,
+ 'cgcloud-mesos==' + cgcloud_version,
+ bd2k_python_lib_dep,
+ fabric_dep ] )
diff --git a/toil/src/cgcloud/__init__.py b/toil/src/cgcloud/__init__.py
new file mode 100644
index 0000000..ce648cf
--- /dev/null
+++ b/toil/src/cgcloud/__init__.py
@@ -0,0 +1 @@
+__import__( 'pkg_resources' ).declare_namespace( __name__ )
diff --git a/toil/src/cgcloud/toil/__init__.py b/toil/src/cgcloud/toil/__init__.py
new file mode 100644
index 0000000..2c479de
--- /dev/null
+++ b/toil/src/cgcloud/toil/__init__.py
@@ -0,0 +1,12 @@
+def roles( ):
+ from cgcloud.toil.toil_box import (ToilLegacyBox,
+ ToilBox,
+ ToilLatestBox,
+ ToilLeader,
+ ToilWorker)
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
+
+
+def cluster_types( ):
+ from cgcloud.toil.toil_cluster import ToilCluster
+ return sorted( locals( ).values( ), key=lambda cls: cls.__name__ )
diff --git a/toil/src/cgcloud/toil/test/__init__.py b/toil/src/cgcloud/toil/test/__init__.py
new file mode 100644
index 0000000..f34c55b
--- /dev/null
+++ b/toil/src/cgcloud/toil/test/__init__.py
@@ -0,0 +1 @@
+__author__ = 'hannes'
diff --git a/toil/src/cgcloud/toil/test/conftest.py b/toil/src/cgcloud/toil/test/conftest.py
new file mode 100644
index 0000000..862a5a2
--- /dev/null
+++ b/toil/src/cgcloud/toil/test/conftest.py
@@ -0,0 +1 @@
+from cgcloud.core.test.conftest import pytest_configure
diff --git a/toil/src/cgcloud/toil/test/test_toil.py b/toil/src/cgcloud/toil/test/test_toil.py
new file mode 100644
index 0000000..b9ac7e3
--- /dev/null
+++ b/toil/src/cgcloud/toil/test/test_toil.py
@@ -0,0 +1,169 @@
+import logging
+import os
+import tempfile
+import time
+import unittest
+from inspect import getsource
+from textwrap import dedent
+
+from bd2k.util.exceptions import panic
+
+from cgcloud.mesos.test import MesosTestCase
+from cgcloud.toil.toil_box import ToilLeader, ToilBox
+from cgcloud.toil.toil_box import ToilWorker
+
+log = logging.getLogger( __name__ )
+
+leader = ToilLeader.role( )
+worker = ToilWorker.role( )
+node = ToilBox.role( )
+
+num_workers = 2
+
+
+class ToilClusterTests( MesosTestCase ):
+ """
+ Covers the creation of a Toil cluster from scratch and running a simple Toil job that invokes
+ Docker on it.
+ """
+ cleanup = True
+ create_image = True
+
+ @classmethod
+ def setUpClass( cls ):
+ os.environ[ 'CGCLOUD_PLUGINS' ] = 'cgcloud.toil:cgcloud.mesos'
+ super( ToilClusterTests, cls ).setUpClass( )
+ if cls.create_image:
+ cls._cgcloud( 'create', node, '-IT' )
+
+ @classmethod
+ def tearDownClass( cls ):
+ if cls.cleanup and cls.create_image:
+ cls._cgcloud( 'delete-image', node )
+ super( ToilClusterTests, cls ).tearDownClass( )
+
+ def test_hello_world( self ):
+ shared_dir = self._prepare_shared_dir( )
+ self._create_cluster( 1, '--share', shared_dir )
+ try:
+ self._assert_remote_failure( leader )
+ self._wait_for_workers( )
+ self._assert_shared_dir( )
+ self._assert_s3am( )
+ self._hello_world( )
+ finally:
+ if self.cleanup:
+ self._terminate_cluster( )
+
+ @unittest.skip( 'Only for interactive invocation' )
+ def test_hello_world_only( self ):
+ self._hello_world( )
+
+ def _prepare_shared_dir( self ):
+ shared_dir = tempfile.mkdtemp( )
+ with open( os.path.join( shared_dir, 'foo' ), 'w' ) as f:
+ f.write( 'bar' )
+ # Append / so rsync transfers the content of directory not the directory itself
+ shared_dir = os.path.join( shared_dir, '' )
+ return shared_dir
+
+ def _assert_shared_dir( self ):
+ command = 'test "$(cat shared/foo)" == bar'
+ self._ssh( leader, command )
+ for i in xrange( num_workers ):
+ self._ssh( worker, command, ordinal=i )
+
+ def _assert_s3am( self ):
+ self._ssh( leader, 's3am --help' )
+
+ def _create_cluster( self, growth, *args ):
+ self._cgcloud( 'create-cluster', 'toil', '-s=%d' % (num_workers - growth),
+ '--ssh-opts', self.ssh_opts_str( ), *args )
+ if growth:
+ self._cgcloud( 'grow-cluster', 'toil', '-s=%d' % growth )
+
+ def _terminate_cluster( self ):
+ self._cgcloud( 'terminate-cluster', 'toil' )
+
+ def _hello_world( self ):
+ script = 'hello_world.py'
+
+ def hello_world( ):
+ # noinspection PyUnresolvedReferences
+ from toil.job import Job
+ from subprocess import check_output
+ import os
+
+ def hello( name ):
+ assert os.environ[ 'TOIL_WORKDIR' ] == '/var/lib/toil'
+ return check_output( [ 'docker', 'run', '-e', 'FOO=' + name, 'ubuntu',
+ 'bash', '-c', 'echo -n Hello, $FOO!' ] )
+
+ if __name__ == '__main__':
+ options = Job.Runner.getDefaultArgumentParser( ).parse_args( )
+ job = Job.wrapFn( hello, "world", cores=1, memory=1e6, disk=1e6 )
+ result = Job.Runner.startToil( job, options )
+ assert result == 'Hello, world!'
+
+ body = dedent( '\n'.join( getsource( hello_world ).split( '\n' )[ 1: ] ) )
+ self._send_file( leader, body, script )
+
+ def hex64( x ):
+ return hex( int( x ) )[ 2: ].zfill( 8 )
+
+ # Could use UUID but prefer historical ordering. Time in s plus PID is sufficiently unique.
+ job_store = 'test-%s%s-toil-job-store' % (hex64( time.time( ) ), hex64( os.getpid( ) ))
+ job_store = ':'.join( ('aws', self.ctx.region, job_store) )
+ self._ssh( leader, 'toil', 'clean', job_store )
+ try:
+ self._ssh( leader, 'python2.7', script,
+ '--batchSystem=mesos',
+ '--mesosMaster=mesos-master:5050',
+ job_store )
+ except:
+ with panic( log ):
+ self._ssh( leader, 'toil', 'clean', job_store )
+
+ def test_persistence( self ):
+ # Check that /var/lib/docker is on the persistent volume and that /var/lib/toil can be
+ # switched between ephemeral and persistent. [ Would use docstring but confuses pytest ]
+ foo = '/var/lib/docker/foo'
+ bar = '/var/lib/toil/bar'
+
+ def compare_device( oper ):
+ return "test $(stat -c '%d' " + foo + ") " + oper + " $(stat -c '%d' " + bar + ")"
+
+ volume_size_gb = 1
+ self._create_cluster( 0, '--ebs-volume-size', str( volume_size_gb ),
+ '-O', 'persist_var_lib_toil=True' )
+ try:
+ try:
+ self._wait_for_workers( )
+ for ordinal in range( num_workers ):
+ self._ssh( worker, 'sudo touch ' + foo, admin=True, o=ordinal )
+ self._ssh( worker, 'touch ' + bar, o=ordinal )
+ # Ensure both files are on the same device (/mnt/persistent)
+ self._ssh( worker, compare_device( "==" ) )
+ finally:
+ self._terminate_cluster( )
+ self._create_cluster( 0, '--ebs-volume-size', str( volume_size_gb ),
+ '-O', 'persist_var_lib_toil=False' )
+ try:
+ self._wait_for_workers( )
+ for ordinal in range( num_workers ):
+ self._ssh( worker, 'sudo test -f ' + foo, admin=True, o=ordinal )
+ self._ssh( worker, 'touch ' + bar, o=ordinal )
+ # Ensure both files are on different devices (/mnt/persistent)
+ self._ssh( worker, compare_device( "!=" ) )
+ finally:
+ if self.cleanup:
+ self._terminate_cluster( )
+ finally:
+ if self.cleanup:
+ self._delete_volumes( )
+
+ def _wait_for_workers( self ):
+ self._wait_for_mesos_slaves( leader, num_workers )
+
+ def _delete_volumes( self ):
+ pass
diff --git a/toil/src/cgcloud/toil/toil_box.py b/toil/src/cgcloud/toil/toil_box.py
new file mode 100644
index 0000000..5863e37
--- /dev/null
+++ b/toil/src/cgcloud/toil/toil_box.py
@@ -0,0 +1,196 @@
+import logging
+import os
+
+import re
+from abc import abstractmethod
+
+from bd2k.util import strict_bool
+from bd2k.util.iterables import concat
+from fabric.operations import put
+
+from cgcloud.core.box import fabric_task
+from cgcloud.core.cluster import ClusterBox, ClusterWorker, ClusterLeader
+from cgcloud.core.common_iam_policies import ec2_full_policy, s3_full_policy, sdb_full_policy
+from cgcloud.core.docker_box import DockerBox
+from cgcloud.core.version import s3am_dep
+from cgcloud.fabric.operations import pip, remote_sudo_popen, sudo, virtualenv
+from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc, UserError
+from cgcloud.mesos.mesos_box import MesosBoxSupport, user, persistent_dir
+
+log = logging.getLogger( __name__ )
+
+
+class ToilBoxSupport( MesosBoxSupport, DockerBox, ClusterBox ):
+ """
+ A box with Mesos, Toil and their dependencies installed.
+ """
+
+ def _list_packages_to_install( self ):
+ return super( ToilBoxSupport, self )._list_packages_to_install( ) + [
+ 'python-dev', 'gcc', 'make',
+ 'libcurl4-openssl-dev', # Only for S3AM
+ 'libffi-dev' ] # pynacl -> toil, Azure client-side encryption
+
+ def _post_install_mesos( self ):
+ super( ToilBoxSupport, self )._post_install_mesos( )
+ # Override this method instead of _post_install_packages() such that this is run before
+ self.__install_toil( )
+ self.__install_s3am( )
+
+ def _docker_users( self ):
+ return super( ToilBoxSupport, self )._docker_users( ) + [ user ]
+
+ def _docker_data_prefixes( self ):
+ # We prefer Docker to be stored on the persistent volume if there is one
+ return concat( persistent_dir, super( ToilBoxSupport, self )._docker_data_prefixes( ) )
+
+ @fabric_task
+ def _setup_docker( self ):
+ super( ToilBoxSupport, self )._setup_docker( )
+ # The docker and dockerbox init jobs depend on /mnt/persistent which is set up by the
+ # mesosbox job. Adding a dependency of the docker job on mesosbox should satsify that
+ # dependency.
+ with remote_sudo_popen( 'patch -d /etc/init' ) as patch:
+ patch.write( heredoc( """
+ --- docker.conf.orig 2015-12-18 23:28:48.693072560 +0000
+ +++ docker.conf 2015-12-18 23:40:30.553072560 +0000
+ @@ -1,6 +1,6 @@
+ description "Docker daemon"
+
+ -start on (local-filesystems and net-device-up IFACE!=lo)
+ +start on (local-filesystems and net-device-up IFACE!=lo and started mesosbox)
+ stop on runlevel [!2345]
+ limit nofile 524288 1048576
+ limit nproc 524288 1048576""" ) )
+
+ def _enable_agent_metrics( self ):
+ return True
+
+ @classmethod
+ def get_role_options( cls ):
+ return super( ToilBoxSupport, cls ).get_role_options( ) + [
+ cls.RoleOption( name='persist_var_lib_toil',
+ type=strict_bool,
+ repr=repr,
+ inherited=True,
+ help='True if /var/lib/toil should be persistent.' ) ]
+
+ def _get_iam_ec2_role( self ):
+ iam_role_name, policies = super( ToilBoxSupport, self )._get_iam_ec2_role( )
+ iam_role_name += '--' + abreviated_snake_case_class_name( ToilBoxSupport )
+ policies.update( dict(
+ toil_iam_pass_role=dict(
+ Version="2012-10-17",
+ Statement=[
+ dict( Effect="Allow", Resource=self._role_arn( ), Action="iam:PassRole" ) ] ),
+ ec2_full=ec2_full_policy,
+ s3_full=s3_full_policy,
+ sbd_full=sdb_full_policy,
+ ec2_toil_box=dict( Version="2012-10-17", Statement=[
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ),
+ dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) )
+ return iam_role_name, policies
+
+ @abstractmethod
+ def _toil_pip_args( self ):
+ raise NotImplementedError()
+
+ @fabric_task
+ def __install_toil( self ):
+ # FIXME: consider using a virtualenv for Toil like we do for s3am
+ # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py
+ pip( 'install --upgrade pip', use_sudo=True )
+ pip( concat( 'install', self._toil_pip_args( ) ), use_sudo=True )
+ self._lazy_mkdir( '/var/lib', 'toil', persistent=None )
+ sudo( 'echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment' )
+
+ @fabric_task
+ def __install_s3am( self ):
+ virtualenv( name='s3am',
+ distributions=[ s3am_dep ],
+ pip_distribution='pip==8.0.2',
+ executable='s3am' )
+
+
+class ToilLegacyBox( ToilBoxSupport ):
+ """
+ A box with Mesos, Toil 3.1.6 and their dependencies installed.
+ """
+
+ def _toil_pip_args( self ):
+ return [ 'toil[aws,mesos,encryption]==3.1.6' ]
+
+
+class ToilBox( ToilBoxSupport ):
+ """
+ A box with Mesos, the latest stable Toil release and their dependencies installed.
+ """
+
+ default_spec = 'toil[aws,mesos,encryption,cwl]==3.3.3'
+
+ @classmethod
+ def get_role_options( cls ):
+ return super( ToilBox, cls ).get_role_options( ) + [
+ cls.RoleOption( name='toil_sdists',
+ type=cls.parse_sdists,
+ repr=cls.unparse_sdists,
+ inherited=False,
+ help="A space-separated list of paths to sdists. If this option is "
+ "present, pip will be used to install the specified sdists "
+ "instead of %s. Each path may be immediately followed by a list "
+ "of extras enclosed in square brackets. The Toil sdist should "
+ "come last. An sdist is a .tar.gz file containing the source "
+ "distribution of a Python project. It is typically created by "
+ "running 'python setup.py sdist' from the project root, or, "
+ "in the case of Toil and CGCloud, running 'make sdist'. Example: "
+ "'%s'. " % (cls.default_spec, cls.unparse_sdists( [
+ ('../cgcloud-lib-1.4a1.dev0.tar.gz', ''),
+ ('dist/toil-3.2.0a2.tar.gz', '[aws,mesos,cgcloud]') ] )) ) ]
+
+ # Accepts "foo", "foo[bar]" and "foo[bar,bla]". Rejects "foo[]", "foo[bar]x"
+ sdist_re = re.compile( r'([^\[\]]+)((?:\[[^\]]+\])?)$' )
+
+ @classmethod
+ def parse_sdists( cls, s ):
+ try:
+ return [ cls.sdist_re.match( sdist ).groups( ) for sdist in s.split( ) ]
+ except:
+ raise UserError( "'%s' is not a valid value for the toil_sdists option." % s )
+
+ @classmethod
+ def unparse_sdists( cls, sdists ):
+ return ' '.join( path + extra for path, extra in sdists )
+
+ @fabric_task
+ def _toil_pip_args( self ):
+ sdists = self.role_options.get( 'toil_sdists' )
+ if sdists:
+ result = [ ]
+ for path, extra in sdists:
+ put( local_path=path )
+ result.append( os.path.basename( path ) + extra )
+ return result
+ else:
+ return [ '--pre', self.default_spec ]
+
+
+class ToilLatestBox( ToilBox ):
+ """
+ A box with Mesos, the latest unstable release of Toil and their dependencies installed
+ """
+ default_spec = 'toil[aws,mesos,encryption,cwl]<=3.5.0'
+
+
+class ToilLeader( ToilBox, ClusterLeader ):
+ """
+ Leader of a cluster of boxes booted from a toil-box, toil-latest-box or toil-legacy-box image
+ """
+ pass
+
+
+class ToilWorker( ToilBox, ClusterWorker ):
+ """
+ Worker in a cluster of boxes booted from a toil-box, toil-latest-box or toil-legacy-box image
+ """
+ pass
diff --git a/toil/src/cgcloud/toil/toil_cluster.py b/toil/src/cgcloud/toil/toil_cluster.py
new file mode 100644
index 0000000..5366680
--- /dev/null
+++ b/toil/src/cgcloud/toil/toil_cluster.py
@@ -0,0 +1,12 @@
+from cgcloud.core.cluster import Cluster
+from cgcloud.toil.toil_box import ToilLeader, ToilWorker
+
+
+class ToilCluster( Cluster ):
+ @property
+ def worker_role( self ):
+ return ToilWorker
+
+ @property
+ def leader_role( self ):
+ return ToilLeader
diff --git a/version.py b/version.py
new file mode 100644
index 0000000..4dd03aa
--- /dev/null
+++ b/version.py
@@ -0,0 +1,21 @@
+cgcloud_version = '1.6.0'
+bd2k_python_lib_dep = 'bd2k-python-lib>=1.14a1.dev37'
+boto_dep = 'boto==2.38.0'
+fabric_dep = 'Fabric==1.10.3'
+s3am_dep = 's3am==2.0a1.dev105'
+
+
+def main( ):
+ import os
+ from pkg_resources import parse_version
+ is_release_build = not parse_version( cgcloud_version ).is_prerelease
+ suffix = '' if is_release_build else '.dev' + os.environ.get( 'BUILD_NUMBER', '0' )
+ for name, value in globals( ).items( ):
+ if name.startswith( 'cgcloud_' ):
+ value += suffix
+ if name.split( '_' )[ -1 ] in ('dep', 'version'):
+ print "%s='%s'" % (name, value)
+
+
+if __name__ == '__main__':
+ main( )
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-cgcloud.git
More information about the debian-med-commit
mailing list