[Python-modules-commits] [python-pex] 01/07: Import python-pex_1.1.2.orig.tar.gz
Barry Warsaw
barry at moszumanska.debian.org
Mon Feb 22 20:58:38 UTC 2016
This is an automated email from the git hooks/post-receive script.
barry pushed a commit to branch master
in repository python-pex.
commit ff2c3c04241dc688dc9dfdfd85898d587aa4253c
Author: Barry Warsaw <barry at python.org>
Date: Mon Feb 22 15:50:10 2016 -0500
Import python-pex_1.1.2.orig.tar.gz
---
CHANGES.rst | 26 ++++++++++++++++++
PKG-INFO | 28 ++++++++++++++++++-
pex.egg-info/PKG-INFO | 28 ++++++++++++++++++-
pex.egg-info/requires.txt | 2 +-
pex/crawler.py | 41 +++++++++++++++++++++++++---
pex/pex_bootstrapper.py | 3 +--
pex/resolver.py | 3 ++-
pex/variables.py | 2 +-
pex/version.py | 6 ++---
scripts/coverage.sh | 2 +-
tests/test_crawler.py | 68 ++++++++++++++++++++++++++++++++++++++++++++---
tests/test_integration.py | 17 +++++++++++-
12 files changed, 207 insertions(+), 19 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index 4b97c9a..fa242f8 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -3,6 +3,32 @@ CHANGES
=======
-----
+1.1.2
+-----
+
+* Bump setuptools & wheel version pinning.
+ `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+
+* Unescape html in PageParser.href_match_to_url.
+ `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+
+* Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+ `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+
+-----
+1.1.1
+-----
+
+* Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+ `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+
+* Add ``/etc/pexrc`` to the list of pexrc locations to check.
+ `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+
+* Improve error messaging for platform constrained Untranslateable errors.
+ `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+
+-----
1.1.0
-----
diff --git a/PKG-INFO b/PKG-INFO
index 09fe0fd..c236fe2 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: pex
-Version: 1.1.0
+Version: 1.1.2
Summary: The PEX packaging toolchain.
Home-page: https://github.com/pantsbuild/pex
Author: UNKNOWN
@@ -11,6 +11,32 @@ Description: =======
=======
-----
+ 1.1.2
+ -----
+
+ * Bump setuptools & wheel version pinning.
+ `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+
+ * Unescape html in PageParser.href_match_to_url.
+ `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+
+ * Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+ `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+
+ -----
+ 1.1.1
+ -----
+
+ * Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+ `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+
+ * Add ``/etc/pexrc`` to the list of pexrc locations to check.
+ `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+
+ * Improve error messaging for platform constrained Untranslateable errors.
+ `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+
+ -----
1.1.0
-----
diff --git a/pex.egg-info/PKG-INFO b/pex.egg-info/PKG-INFO
index 09fe0fd..c236fe2 100644
--- a/pex.egg-info/PKG-INFO
+++ b/pex.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: pex
-Version: 1.1.0
+Version: 1.1.2
Summary: The PEX packaging toolchain.
Home-page: https://github.com/pantsbuild/pex
Author: UNKNOWN
@@ -11,6 +11,32 @@ Description: =======
=======
-----
+ 1.1.2
+ -----
+
+ * Bump setuptools & wheel version pinning.
+ `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+
+ * Unescape html in PageParser.href_match_to_url.
+ `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+
+ * Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+ `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+
+ -----
+ 1.1.1
+ -----
+
+ * Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+ `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+
+ * Add ``/etc/pexrc`` to the list of pexrc locations to check.
+ `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+
+ * Improve error messaging for platform constrained Untranslateable errors.
+ `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+
+ -----
1.1.0
-----
diff --git a/pex.egg-info/requires.txt b/pex.egg-info/requires.txt
index 00e3615..4ea40c9 100644
--- a/pex.egg-info/requires.txt
+++ b/pex.egg-info/requires.txt
@@ -1 +1 @@
-setuptools>=2.2,<16
+setuptools>=2.2,<20
diff --git a/pex/crawler.py b/pex/crawler.py
index e61e540..00b70fb 100644
--- a/pex/crawler.py
+++ b/pex/crawler.py
@@ -12,6 +12,7 @@ from .compatibility import PY3
from .http import Context
from .link import Link
from .tracer import TRACER
+from .util import Memoizer
if PY3:
from queue import Empty, Queue
@@ -21,6 +22,15 @@ else:
from urlparse import urlparse
+def unescape(s):
+ """Unescapes html. Taken from https://wiki.python.org/moin/EscapingHtml"""
+ s = s.replace("<", "<")
+ s = s.replace(">", ">")
+ # this has to be last:
+ s = s.replace("&", "&")
+ return s
+
+
class PageParser(object):
"""A helper class to extract and differentiate ordinary and download links from webpages."""
@@ -33,7 +43,7 @@ class PageParser(object):
def href_match_to_url(cls, match):
def pick(group):
return '' if group is None else group
- return pick(match.group(1)) or pick(match.group(2)) or pick(match.group(3))
+ return unescape(pick(match.group(1)) or pick(match.group(2)) or pick(match.group(3)))
@classmethod
def rel_links(cls, page):
@@ -64,6 +74,14 @@ def partition(L, pred):
class Crawler(object):
"""A multi-threaded crawler that supports local (disk) and remote (web) crawling."""
+ # Memoizer for calls to Crawler.crawl().
+ _CRAWL_CACHE = Memoizer()
+
+ @classmethod
+ def reset_cache(cls):
+ """Reset the internal crawl cache. This is intended primarily for tests."""
+ cls._CRAWL_CACHE = Memoizer()
+
@classmethod
def crawl_local(cls, link):
try:
@@ -99,7 +117,22 @@ class Crawler(object):
self._threads = threads
self.context = context or Context.get()
+ def _make_cache_key(self, links, follow_links):
+ return (follow_links,) + tuple(links)
+
def crawl(self, link_or_links, follow_links=False):
+ links = list(Link.wrap_iterable(link_or_links))
+ cache_key = self._make_cache_key(links, follow_links)
+
+ # Memoize crawling to a global Memoizer (Crawler._CRAWL_CACHE).
+ result = self._CRAWL_CACHE.get(cache_key)
+ if result is None:
+ result = self._crawl(links, follow_links)
+ self._CRAWL_CACHE.store(cache_key, result)
+
+ return result
+
+ def _crawl(self, link_or_links, follow_links):
links, seen = set(), set()
queue = Queue()
converged = threading.Event()
@@ -127,7 +160,8 @@ class Crawler(object):
queue.put(rel)
queue.task_done()
- for link in Link.wrap_iterable(link_or_links):
+ for i, link in enumerate(link_or_links):
+ TRACER.log('crawling link i=%s link=%s follow_links=%s' % (i, link, follow_links), V=3)
queue.put(link)
workers = []
@@ -140,6 +174,5 @@ class Crawler(object):
queue.join()
converged.set()
- # We deliberately not join back the worker threads, since they are no longer of
- # any use to us.
+ # We deliberately do not join the worker threads, since they are no longer of any use to us.
return links
diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py
index 586c788..137757f 100644
--- a/pex/pex_bootstrapper.py
+++ b/pex/pex_bootstrapper.py
@@ -82,8 +82,7 @@ def maybe_reexec_pex():
target = find_in_path(target_python)
if not target:
die('Failed to find interpreter specified by PEX_PYTHON: %s' % target)
- current = os.path.realpath(sys.executable)
- if os.path.exists(target) and target != current:
+ if os.path.exists(target) and os.path.realpath(target) != os.path.realpath(sys.executable):
TRACER.log('Detected PEX_PYTHON, re-exec to %s' % target)
ENV.delete('PEX_PYTHON')
os.execve(target, [target_python] + sys.argv, ENV.copy())
diff --git a/pex/resolver.py b/pex/resolver.py
index fe45fa6..b65fcc1 100644
--- a/pex/resolver.py
+++ b/pex/resolver.py
@@ -167,7 +167,8 @@ class Resolver(object):
if dist is None:
raise Untranslateable('Package %s is not translateable by %s' % (package, translator))
if not distribution_compatible(dist, self._interpreter, self._platform):
- raise Untranslateable('Could not get distribution for %s on appropriate platform.' % package)
+ raise Untranslateable(
+ 'Could not get distribution for %s on platform %s.' % (package, self._platform))
return dist
def resolve(self, resolvables, resolvable_set=None):
diff --git a/pex/variables.py b/pex/variables.py
index 51328b1..5075f36 100644
--- a/pex/variables.py
+++ b/pex/variables.py
@@ -52,7 +52,7 @@ class Variables(object):
def _from_rc(self, rc):
ret_vars = {}
- for filename in [rc, os.path.join(os.path.dirname(sys.argv[0]), '.pexrc')]:
+ for filename in ['/etc/pexrc', rc, os.path.join(os.path.dirname(sys.argv[0]), '.pexrc')]:
try:
with open(os.path.expanduser(filename)) as fh:
rc_items = map(self._get_kv, fh)
diff --git a/pex/version.py b/pex/version.py
index 395b068..51862fa 100644
--- a/pex/version.py
+++ b/pex/version.py
@@ -1,7 +1,7 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
-__version__ = '1.1.0'
+__version__ = '1.1.2'
-SETUPTOOLS_REQUIREMENT = 'setuptools>=2.2,<16'
-WHEEL_REQUIREMENT = 'wheel>=0.24.0,<0.25.0'
+SETUPTOOLS_REQUIREMENT = 'setuptools>=2.2,<20'
+WHEEL_REQUIREMENT = 'wheel>=0.24.0,<0.27.0'
diff --git a/scripts/coverage.sh b/scripts/coverage.sh
index 095b7a5..9a58e11 100755
--- a/scripts/coverage.sh
+++ b/scripts/coverage.sh
@@ -4,4 +4,4 @@ coverage run -p -m py.test tests
coverage run -p -m pex.bin.pex -v --help >&/dev/null
coverage run -p -m pex.bin.pex -v -- scripts/do_nothing.py
coverage run -p -m pex.bin.pex -v requests -- scripts/do_nothing.py
-coverage run -p -m pex.bin.pex -v . 'setuptools>=2.2,<16' -- scripts/do_nothing.py
+coverage run -p -m pex.bin.pex -v . 'setuptools>=2.2,<20' -- scripts/do_nothing.py
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 470c429..3fc44eb 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -6,8 +6,14 @@ import os
from twitter.common.contextutil import temporary_dir
from pex.crawler import Crawler, PageParser
+from pex.http import Context
from pex.link import Link
+try:
+ from unittest import mock
+except ImportError:
+ import mock
+
def lpp(page):
links = PageParser.links(page)
@@ -40,6 +46,12 @@ def test_page_parser_basic():
assert lpp("<a href='stuff'> <a href=%s>" % target) == (['stuff', href], [])
+def test_page_parser_escaped_html():
+ url = 'url?param1=val¶m2=val2'
+ link = 'a href="%s"' % url.replace('&', '&')
+ assert lpp(link) == ([url], [])
+
+
def test_page_parser_rels():
VALID_RELS = tuple(PageParser.REL_TYPES)
for rel in VALID_RELS + ('', ' ', 'blah'):
@@ -100,6 +112,56 @@ def test_crawler_unknown_scheme():
Crawler().crawl('ftp://ftp.cdrom.com') == (set(), set())
-# TODO(wickman)
-# test remote http crawling via mock
-# test page decoding via mock
+MOCK_INDEX_TMPL = '''
+<h1>Index of /home/third_party/python</h1>
+<table>
+<tr>
+ <td valign="top"><img src="/icons/back.gif" alt="[DIR]"></td>
+ <td> </td>
+ <td align="right"> - </td>
+ <td> </td>
+</tr>
+%s
+</table>
+'''
+
+MOCK_INDEX_A = MOCK_INDEX_TMPL % '''
+<tr>
+ <td valign="top"><img src="/icons/compressed.gif" alt="[ ]"></td>
+ <td><a href="3to2-1.0.tar.gz">3to2-1.0.tar.gz</a></td>
+ <td align="right">16-Apr-2015 23:18 </td>
+ <td align="right"> 45K</td>
+ <td>GZIP compressed docume></td>
+</tr>
+'''
+
+MOCK_INDEX_B = MOCK_INDEX_TMPL % '''
+<tr>
+ <td valign="top"><img src="/icons/compressed.gif" alt="[ ]"></td>
+ <td>
+ <a href="APScheduler-2.1.0.tar.gz">APScheduler-2.1.0.tar.gz</a>
+ </td>
+ <td align="right">16-Apr-2015 23:18 </td>
+ <td align="right"> 41K</td>
+ <td>GZIP compressed docume></td>
+</tr>
+'''
+
+
+def test_crawler_remote():
+ Crawler.reset_cache()
+
+ mock_context = mock.create_autospec(Context, spec_set=True)
+ mock_context.content.side_effect = [MOCK_INDEX_A, MOCK_INDEX_B, Exception('shouldnt get here')]
+ expected_output = set([Link('http://url1.test.com/3to2-1.0.tar.gz'),
+ Link('http://url2.test.com/APScheduler-2.1.0.tar.gz')])
+
+ c = Crawler(mock_context)
+ test_links = [Link('http://url1.test.com'), Link('http://url2.test.com')]
+ assert c.crawl(test_links) == expected_output
+
+ # Test memoization of Crawler.crawl().
+ assert c.crawl(test_links) == expected_output
+
+
+# TODO(wickman): test page decoding via mock
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 4ec03ab..f53b8f5 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -2,8 +2,9 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import os
+import sys
-from twitter.common.contextutil import temporary_file
+from twitter.common.contextutil import environment_as, temporary_dir, temporary_file
from pex.testing import run_simple_pex_test
@@ -30,3 +31,17 @@ def test_pex_interpreter():
so, rc = run_simple_pex_test("", args=(fp.name,), coverage=True, env=env)
assert so == b'Hello world\n'
assert rc == 0
+
+
+def test_pex_python_symlink():
+ with temporary_dir() as td:
+ with environment_as(HOME=td):
+ symlink_path = os.path.join(td, 'python-symlink')
+ os.symlink(sys.executable, symlink_path)
+ pexrc_path = os.path.join(td, '.pexrc')
+ with open(pexrc_path, 'w') as pexrc:
+ pexrc.write("PEX_PYTHON=%s" % symlink_path)
+
+ body = "print('Hello')"
+ _, rc = run_simple_pex_test(body, coverage=True)
+ assert rc == 0
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-pex.git
More information about the Python-modules-commits
mailing list