[Python-modules-commits] [python-pex] 01/07: Import python-pex_1.1.2.orig.tar.gz

Mon Feb 22 20:58:38 UTC 2016

This is an automated email from the git hooks/post-receive script.

barry pushed a commit to branch master
in repository python-pex.

commit ff2c3c04241dc688dc9dfdfd85898d587aa4253c
Author: Barry Warsaw <barry at python.org>
Date:   Mon Feb 22 15:50:10 2016 -0500

    Import python-pex_1.1.2.orig.tar.gz
---
 CHANGES.rst               | 26 ++++++++++++++++++
 PKG-INFO                  | 28 ++++++++++++++++++-
 pex.egg-info/PKG-INFO     | 28 ++++++++++++++++++-
 pex.egg-info/requires.txt |  2 +-
 pex/crawler.py            | 41 +++++++++++++++++++++++++---
 pex/pex_bootstrapper.py   |  3 +--
 pex/resolver.py           |  3 ++-
 pex/variables.py          |  2 +-
 pex/version.py            |  6 ++---
 scripts/coverage.sh       |  2 +-
 tests/test_crawler.py     | 68 ++++++++++++++++++++++++++++++++++++++++++++---
 tests/test_integration.py | 17 +++++++++++-
 12 files changed, 207 insertions(+), 19 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 4b97c9a..fa242f8 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -3,6 +3,32 @@ CHANGES
 =======
 
 -----
+1.1.2
+-----
+
+* Bump setuptools & wheel version pinning.
+  `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+
+* Unescape html in PageParser.href_match_to_url.
+  `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+
+* Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+  `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+
+-----
+1.1.1
+-----
+
+* Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+  `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+
+* Add ``/etc/pexrc`` to the list of pexrc locations to check.
+  `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+
+* Improve error messaging for platform constrained Untranslateable errors.
+  `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+
+-----
 1.1.0
 -----
 
diff --git a/PKG-INFO b/PKG-INFO
index 09fe0fd..c236fe2 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: pex
-Version: 1.1.0
+Version: 1.1.2
 Summary: The PEX packaging toolchain.
 Home-page: https://github.com/pantsbuild/pex
 Author: UNKNOWN
@@ -11,6 +11,32 @@ Description: =======
         =======
         
         -----
+        1.1.2
+        -----
+        
+        * Bump setuptools & wheel version pinning.
+          `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+        
+        * Unescape html in PageParser.href_match_to_url.
+          `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+        
+        * Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+          `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+        
+        -----
+        1.1.1
+        -----
+        
+        * Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+          `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+        
+        * Add ``/etc/pexrc`` to the list of pexrc locations to check.
+          `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+        
+        * Improve error messaging for platform constrained Untranslateable errors.
+          `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+        
+        -----
         1.1.0
         -----
         
diff --git a/pex.egg-info/PKG-INFO b/pex.egg-info/PKG-INFO
index 09fe0fd..c236fe2 100644
--- a/pex.egg-info/PKG-INFO
+++ b/pex.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: pex
-Version: 1.1.0
+Version: 1.1.2
 Summary: The PEX packaging toolchain.
 Home-page: https://github.com/pantsbuild/pex
 Author: UNKNOWN
@@ -11,6 +11,32 @@ Description: =======
         =======
         
         -----
+        1.1.2
+        -----
+        
+        * Bump setuptools & wheel version pinning.
+          `#194 <https://github.com/pantsbuild/pex/pull/194>`_
+        
+        * Unescape html in PageParser.href_match_to_url.
+          `#191 <https://github.com/pantsbuild/pex/pull/191>`_
+        
+        * Memoize calls to Crawler.crawl() for performance win in find-links based resolution.
+          `#187 <https://github.com/pantsbuild/pex/pull/187>`_
+        
+        -----
+        1.1.1
+        -----
+        
+        * Fix infinite recursion when ``PEX_PYTHON`` points at a symlink.
+          `#182 <https://github.com/pantsbuild/pex/pull/182>`_
+        
+        * Add ``/etc/pexrc`` to the list of pexrc locations to check.
+          `#183 <https://github.com/pantsbuild/pex/pull/183>`_
+        
+        * Improve error messaging for platform constrained Untranslateable errors.
+          `#179 <https://github.com/pantsbuild/pex/pull/179>`_
+        
+        -----
         1.1.0
         -----
         
diff --git a/pex.egg-info/requires.txt b/pex.egg-info/requires.txt
index 00e3615..4ea40c9 100644
--- a/pex.egg-info/requires.txt
+++ b/pex.egg-info/requires.txt
@@ -1 +1 @@
-setuptools>=2.2,<16
+setuptools>=2.2,<20
diff --git a/pex/crawler.py b/pex/crawler.py
index e61e540..00b70fb 100644
--- a/pex/crawler.py
+++ b/pex/crawler.py
@@ -12,6 +12,7 @@ from .compatibility import PY3
 from .http import Context
 from .link import Link
 from .tracer import TRACER
+from .util import Memoizer
 
 if PY3:
   from queue import Empty, Queue
@@ -21,6 +22,15 @@ else:
   from urlparse import urlparse
 
 
+def unescape(s):
+  """Unescapes html. Taken from https://wiki.python.org/moin/EscapingHtml"""
+  s = s.replace("<", "<")
+  s = s.replace(">", ">")
+  # this has to be last:
+  s = s.replace("&", "&")
+  return s
+
+
 class PageParser(object):
   """A helper class to extract and differentiate ordinary and download links from webpages."""
 
@@ -33,7 +43,7 @@ class PageParser(object):
   def href_match_to_url(cls, match):
     def pick(group):
       return '' if group is None else group
-    return pick(match.group(1)) or pick(match.group(2)) or pick(match.group(3))
+    return unescape(pick(match.group(1)) or pick(match.group(2)) or pick(match.group(3)))
 
   @classmethod
   def rel_links(cls, page):
@@ -64,6 +74,14 @@ def partition(L, pred):
 class Crawler(object):
   """A multi-threaded crawler that supports local (disk) and remote (web) crawling."""
 
+  # Memoizer for calls to Crawler.crawl().
+  _CRAWL_CACHE = Memoizer()
+
+  @classmethod
+  def reset_cache(cls):
+    """Reset the internal crawl cache. This is intended primarily for tests."""
+    cls._CRAWL_CACHE = Memoizer()
+
   @classmethod
   def crawl_local(cls, link):
     try:
@@ -99,7 +117,22 @@ class Crawler(object):
     self._threads = threads
     self.context = context or Context.get()
 
+  def _make_cache_key(self, links, follow_links):
+    return (follow_links,) + tuple(links)
+
   def crawl(self, link_or_links, follow_links=False):
+    links = list(Link.wrap_iterable(link_or_links))
+    cache_key = self._make_cache_key(links, follow_links)
+
+    # Memoize crawling to a global Memoizer (Crawler._CRAWL_CACHE).
+    result = self._CRAWL_CACHE.get(cache_key)
+    if result is None:
+      result = self._crawl(links, follow_links)
+      self._CRAWL_CACHE.store(cache_key, result)
+
+    return result
+
+  def _crawl(self, link_or_links, follow_links):
     links, seen = set(), set()
     queue = Queue()
     converged = threading.Event()
@@ -127,7 +160,8 @@ class Crawler(object):
                 queue.put(rel)
         queue.task_done()
 
-    for link in Link.wrap_iterable(link_or_links):
+    for i, link in enumerate(link_or_links):
+      TRACER.log('crawling link i=%s link=%s follow_links=%s' % (i, link, follow_links), V=3)
       queue.put(link)
 
     workers = []
@@ -140,6 +174,5 @@ class Crawler(object):
     queue.join()
     converged.set()
 
-    # We deliberately not join back the worker threads, since they are no longer of
-    # any use to us.
+    # We deliberately do not join the worker threads, since they are no longer of any use to us.
     return links
diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py
index 586c788..137757f 100644
--- a/pex/pex_bootstrapper.py
+++ b/pex/pex_bootstrapper.py
@@ -82,8 +82,7 @@ def maybe_reexec_pex():
   target = find_in_path(target_python)
   if not target:
     die('Failed to find interpreter specified by PEX_PYTHON: %s' % target)
-  current = os.path.realpath(sys.executable)
-  if os.path.exists(target) and target != current:
+  if os.path.exists(target) and os.path.realpath(target) != os.path.realpath(sys.executable):
     TRACER.log('Detected PEX_PYTHON, re-exec to %s' % target)
     ENV.delete('PEX_PYTHON')
     os.execve(target, [target_python] + sys.argv, ENV.copy())
diff --git a/pex/resolver.py b/pex/resolver.py
index fe45fa6..b65fcc1 100644
--- a/pex/resolver.py
+++ b/pex/resolver.py
@@ -167,7 +167,8 @@ class Resolver(object):
     if dist is None:
       raise Untranslateable('Package %s is not translateable by %s' % (package, translator))
     if not distribution_compatible(dist, self._interpreter, self._platform):
-      raise Untranslateable('Could not get distribution for %s on appropriate platform.' % package)
+      raise Untranslateable(
+        'Could not get distribution for %s on platform %s.' % (package, self._platform))
     return dist
 
   def resolve(self, resolvables, resolvable_set=None):
diff --git a/pex/variables.py b/pex/variables.py
index 51328b1..5075f36 100644
--- a/pex/variables.py
+++ b/pex/variables.py
@@ -52,7 +52,7 @@ class Variables(object):
 
   def _from_rc(self, rc):
     ret_vars = {}
-    for filename in [rc, os.path.join(os.path.dirname(sys.argv[0]), '.pexrc')]:
+    for filename in ['/etc/pexrc', rc, os.path.join(os.path.dirname(sys.argv[0]), '.pexrc')]:
       try:
         with open(os.path.expanduser(filename)) as fh:
           rc_items = map(self._get_kv, fh)
diff --git a/pex/version.py b/pex/version.py
index 395b068..51862fa 100644
--- a/pex/version.py
+++ b/pex/version.py
@@ -1,7 +1,7 @@
 # Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
-__version__ = '1.1.0'
+__version__ = '1.1.2'
 
-SETUPTOOLS_REQUIREMENT = 'setuptools>=2.2,<16'
-WHEEL_REQUIREMENT = 'wheel>=0.24.0,<0.25.0'
+SETUPTOOLS_REQUIREMENT = 'setuptools>=2.2,<20'
+WHEEL_REQUIREMENT = 'wheel>=0.24.0,<0.27.0'
diff --git a/scripts/coverage.sh b/scripts/coverage.sh
index 095b7a5..9a58e11 100755
--- a/scripts/coverage.sh
+++ b/scripts/coverage.sh
@@ -4,4 +4,4 @@ coverage run -p -m py.test tests
 coverage run -p -m pex.bin.pex -v --help >&/dev/null
 coverage run -p -m pex.bin.pex -v -- scripts/do_nothing.py
 coverage run -p -m pex.bin.pex -v requests -- scripts/do_nothing.py
-coverage run -p -m pex.bin.pex -v . 'setuptools>=2.2,<16' -- scripts/do_nothing.py
+coverage run -p -m pex.bin.pex -v . 'setuptools>=2.2,<20' -- scripts/do_nothing.py
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 470c429..3fc44eb 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -6,8 +6,14 @@ import os
 from twitter.common.contextutil import temporary_dir
 
 from pex.crawler import Crawler, PageParser
+from pex.http import Context
 from pex.link import Link
 
+try:
+  from unittest import mock
+except ImportError:
+  import mock
+
 
 def lpp(page):
   links = PageParser.links(page)
@@ -40,6 +46,12 @@ def test_page_parser_basic():
       assert lpp("<a href='stuff'> <a href=%s>" % target) == (['stuff', href], [])
 
 
+def test_page_parser_escaped_html():
+  url = 'url?param1=val&param2=val2'
+  link = 'a href="%s"' % url.replace('&', '&')
+  assert lpp(link) == ([url], [])
+
+
 def test_page_parser_rels():
   VALID_RELS = tuple(PageParser.REL_TYPES)
   for rel in VALID_RELS + ('', ' ', 'blah'):
@@ -100,6 +112,56 @@ def test_crawler_unknown_scheme():
   Crawler().crawl('ftp://ftp.cdrom.com') == (set(), set())
 
 
-# TODO(wickman)
-#   test remote http crawling via mock
-#   test page decoding via mock
+MOCK_INDEX_TMPL = '''
+<h1>Index of /home/third_party/python</h1>
+<table>
+<tr>
+  <td valign="top"><img src="/icons/back.gif" alt="[DIR]"></td>
+  <td> </td>
+  <td align="right">  - </td>
+  <td> </td>
+</tr>
+%s
+</table>
+'''
+
+MOCK_INDEX_A = MOCK_INDEX_TMPL % '''
+<tr>
+  <td valign="top"><img src="/icons/compressed.gif" alt="[   ]"></td>
+  <td><a href="3to2-1.0.tar.gz">3to2-1.0.tar.gz</a></td>
+  <td align="right">16-Apr-2015 23:18  </td>
+  <td align="right"> 45K</td>
+  <td>GZIP compressed docume></td>
+</tr>
+'''
+
+MOCK_INDEX_B = MOCK_INDEX_TMPL % '''
+<tr>
+  <td valign="top"><img src="/icons/compressed.gif" alt="[   ]"></td>
+  <td>
+    <a href="APScheduler-2.1.0.tar.gz">APScheduler-2.1.0.tar.gz</a>
+  </td>
+  <td align="right">16-Apr-2015 23:18  </td>
+  <td align="right"> 41K</td>
+  <td>GZIP compressed docume></td>
+</tr>
+'''
+
+
+def test_crawler_remote():
+  Crawler.reset_cache()
+
+  mock_context = mock.create_autospec(Context, spec_set=True)
+  mock_context.content.side_effect = [MOCK_INDEX_A, MOCK_INDEX_B, Exception('shouldnt get here')]
+  expected_output = set([Link('http://url1.test.com/3to2-1.0.tar.gz'),
+                         Link('http://url2.test.com/APScheduler-2.1.0.tar.gz')])
+
+  c = Crawler(mock_context)
+  test_links = [Link('http://url1.test.com'), Link('http://url2.test.com')]
+  assert c.crawl(test_links) == expected_output
+
+  # Test memoization of Crawler.crawl().
+  assert c.crawl(test_links) == expected_output
+
+
+# TODO(wickman): test page decoding via mock
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 4ec03ab..f53b8f5 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -2,8 +2,9 @@
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
 import os
+import sys
 
-from twitter.common.contextutil import temporary_file
+from twitter.common.contextutil import environment_as, temporary_dir, temporary_file
 
 from pex.testing import run_simple_pex_test
 
@@ -30,3 +31,17 @@ def test_pex_interpreter():
     so, rc = run_simple_pex_test("", args=(fp.name,), coverage=True, env=env)
     assert so == b'Hello world\n'
     assert rc == 0
+
+
+def test_pex_python_symlink():
+  with temporary_dir() as td:
+    with environment_as(HOME=td):
+      symlink_path = os.path.join(td, 'python-symlink')
+      os.symlink(sys.executable, symlink_path)
+      pexrc_path = os.path.join(td, '.pexrc')
+      with open(pexrc_path, 'w') as pexrc:
+        pexrc.write("PEX_PYTHON=%s" % symlink_path)
+
+      body = "print('Hello')"
+      _, rc = run_simple_pex_test(body, coverage=True)
+      assert rc == 0

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-pex.git