[Python-modules-commits] [python-cachecontrol] 01/03: Import python-cachecontrol_0.11.7.orig.tar.gz

Barry Warsaw barry at moszumanska.debian.org
Mon Oct 31 20:36:06 UTC 2016


This is an automated email from the git hooks/post-receive script.

barry pushed a commit to branch master
in repository python-cachecontrol.

commit a35f01178dfeb5c41724f4c2dff587f20c1d593b
Author: Barry Warsaw <barry at python.org>
Date:   Mon Oct 31 16:28:24 2016 -0400

    Import python-cachecontrol_0.11.7.orig.tar.gz
---
 CacheControl.egg-info/PKG-INFO         |  6 +--
 CacheControl.egg-info/SOURCES.txt      |  2 +
 CacheControl.egg-info/entry_points.txt |  3 ++
 PKG-INFO                               |  6 +--
 README.rst                             |  4 +-
 cachecontrol/__init__.py               |  2 +-
 cachecontrol/_cmd.py                   | 60 ++++++++++++++++++++++
 cachecontrol/adapter.py                | 18 +++++--
 cachecontrol/compat.py                 |  6 +++
 cachecontrol/controller.py             | 94 ++++++++++++++++++++++++++--------
 cachecontrol/filewrapper.py            | 33 ++++++++----
 cachecontrol/heuristics.py             | 12 +++--
 cachecontrol/serialize.py              | 18 +++++--
 setup.cfg                              |  2 +-
 setup.py                               |  7 ++-
 15 files changed, 221 insertions(+), 52 deletions(-)

diff --git a/CacheControl.egg-info/PKG-INFO b/CacheControl.egg-info/PKG-INFO
index 73e3419..aadf0b8 100644
--- a/CacheControl.egg-info/PKG-INFO
+++ b/CacheControl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: CacheControl
-Version: 0.11.5
+Version: 0.11.7
 Summary: httplib2 caching for requests
 Home-page: https://github.com/ionrock/cachecontrol
 Author: Eric Larson
@@ -10,8 +10,8 @@ Description: ==============
          CacheControl
         ==============
         
-        .. image:: https://pypip.in/version/cachecontrol/badge.svg
-            :target: https://pypi.python.org/pypi/cachecontrol/
+        .. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+            :target: https://pypi.python.org/pypi/cachecontrol
             :alt: Latest Version
         
         .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/CacheControl.egg-info/SOURCES.txt b/CacheControl.egg-info/SOURCES.txt
index 04714c4..f89ee39 100644
--- a/CacheControl.egg-info/SOURCES.txt
+++ b/CacheControl.egg-info/SOURCES.txt
@@ -6,9 +6,11 @@ setup.py
 CacheControl.egg-info/PKG-INFO
 CacheControl.egg-info/SOURCES.txt
 CacheControl.egg-info/dependency_links.txt
+CacheControl.egg-info/entry_points.txt
 CacheControl.egg-info/requires.txt
 CacheControl.egg-info/top_level.txt
 cachecontrol/__init__.py
+cachecontrol/_cmd.py
 cachecontrol/adapter.py
 cachecontrol/cache.py
 cachecontrol/compat.py
diff --git a/CacheControl.egg-info/entry_points.txt b/CacheControl.egg-info/entry_points.txt
new file mode 100644
index 0000000..7c31574
--- /dev/null
+++ b/CacheControl.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+doesitcache = cachecontrol._cmd:main
+
diff --git a/PKG-INFO b/PKG-INFO
index 73e3419..aadf0b8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: CacheControl
-Version: 0.11.5
+Version: 0.11.7
 Summary: httplib2 caching for requests
 Home-page: https://github.com/ionrock/cachecontrol
 Author: Eric Larson
@@ -10,8 +10,8 @@ Description: ==============
          CacheControl
         ==============
         
-        .. image:: https://pypip.in/version/cachecontrol/badge.svg
-            :target: https://pypi.python.org/pypi/cachecontrol/
+        .. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+            :target: https://pypi.python.org/pypi/cachecontrol
             :alt: Latest Version
         
         .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/README.rst b/README.rst
index 1e790a0..7063ebf 100644
--- a/README.rst
+++ b/README.rst
@@ -2,8 +2,8 @@
  CacheControl
 ==============
 
-.. image:: https://pypip.in/version/cachecontrol/badge.svg
-    :target: https://pypi.python.org/pypi/cachecontrol/
+.. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+    :target: https://pypi.python.org/pypi/cachecontrol
     :alt: Latest Version
 
 .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/cachecontrol/__init__.py b/cachecontrol/__init__.py
index d6af9b9..ec9da2e 100644
--- a/cachecontrol/__init__.py
+++ b/cachecontrol/__init__.py
@@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
 """
 __author__ = 'Eric Larson'
 __email__ = 'eric at ionrock.org'
-__version__ = '0.11.5'
+__version__ = '0.11.7'
 
 from .wrapper import CacheControl
 from .adapter import CacheControlAdapter
diff --git a/cachecontrol/_cmd.py b/cachecontrol/_cmd.py
new file mode 100644
index 0000000..fcb785d
--- /dev/null
+++ b/cachecontrol/_cmd.py
@@ -0,0 +1,60 @@
+import logging
+
+import requests
+
+from cachecontrol.adapter import CacheControlAdapter
+from cachecontrol.cache import DictCache
+from cachecontrol.controller import logger
+
+from argparse import ArgumentParser
+
+
+def setup_logging():
+    logger.setLevel(logging.DEBUG)
+    handler = logging.StreamHandler()
+    logger.addHandler(handler)
+
+
+def get_session():
+    adapter = CacheControlAdapter(
+        DictCache(),
+        cache_etags=True,
+        serializer=None,
+        heuristic=None,
+    )
+    sess = requests.Session()
+    sess.mount('http://', adapter)
+    sess.mount('https://', adapter)
+
+    sess.cache_controller = adapter.controller
+    return sess
+
+
+def get_args():
+    parser = ArgumentParser()
+    parser.add_argument('url', help='The URL to try and cache')
+    return parser.parse_args()
+
+
+def main(args=None):
+    args = get_args()
+    sess = get_session()
+
+    # Make a request to get a response
+    resp = sess.get(args.url)
+
+    # Turn on logging
+    setup_logging()
+
+    # try setting the cache
+    sess.cache_controller.cache_response(resp.request, resp.raw)
+
+    # Now try to get it
+    if sess.cache_controller.cached_request(resp.request):
+        print('Cached!')
+    else:
+        print('Not cached :(')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py
index 54f1b51..270c8b2 100644
--- a/cachecontrol/adapter.py
+++ b/cachecontrol/adapter.py
@@ -1,3 +1,4 @@
+import types
 import functools
 
 from requests.adapters import HTTPAdapter
@@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter):
         cached response
         """
         if not from_cache and request.method == 'GET':
+            # Check for any heuristics that might update headers
+            # before trying to cache.
+            if self.heuristic:
+                response = self.heuristic.apply(response)
 
             # apply any expiration heuristics
             if response.status == 304:
@@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter):
             elif response.status == 301:
                 self.controller.cache_response(request, response)
             else:
-                # Check for any heuristics that might update headers
-                # before trying to cache.
-                if self.heuristic:
-                    response = self.heuristic.apply(response)
-
                 # Wrap the response file with a wrapper that will cache the
                 #   response when the stream has been consumed.
                 response._fp = CallbackFileWrapper(
@@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter):
                         response,
                     )
                 )
+                if response.chunked:
+                    super_update_chunk_length = response._update_chunk_length
+
+                    def _update_chunk_length(self):
+                        super_update_chunk_length()
+                        if self.chunk_left == 0:
+                            self._fp._close()
+                    response._update_chunk_length = types.MethodType(_update_chunk_length, response)
 
         resp = super(CacheControlAdapter, self).build_response(
             request, response
diff --git a/cachecontrol/compat.py b/cachecontrol/compat.py
index 489eb86..ce55657 100644
--- a/cachecontrol/compat.py
+++ b/cachecontrol/compat.py
@@ -21,3 +21,9 @@ try:
     from requests.packages.urllib3.util import is_fp_closed
 except ImportError:
     from urllib3.util import is_fp_closed
+
+# Replicate some six behaviour
+try:
+    text_type = (unicode,)
+except NameError:
+    text_type = (str,)
diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py
index f038074..f93f083 100644
--- a/cachecontrol/controller.py
+++ b/cachecontrol/controller.py
@@ -1,6 +1,7 @@
 """
 The httplib2 algorithms ported for use with requests.
 """
+import logging
 import re
 import calendar
 import time
@@ -12,6 +13,8 @@ from .cache import DictCache
 from .serialize import Serializer
 
 
+logger = logging.getLogger(__name__)
+
 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
 
 
@@ -86,23 +89,28 @@ class CacheController(object):
         return False.
         """
         cache_url = self.cache_url(request.url)
+        logger.debug('Looking up "%s" in the cache', cache_url)
         cc = self.parse_cache_control(request.headers)
 
-        # non-caching states
-        no_cache = True if 'no-cache' in cc else False
-        if 'max-age' in cc and cc['max-age'] == 0:
-            no_cache = True
+        # Bail out if the request insists on fresh data
+        if 'no-cache' in cc:
+            logger.debug('Request header has "no-cache", cache bypassed')
+            return False
 
-        # Bail out if no-cache was set
-        if no_cache:
+        if 'max-age' in cc and cc['max-age'] == 0:
+            logger.debug('Request header has "max_age" as 0, cache bypassed')
             return False
 
-        # It is in the cache, so lets see if it is going to be
-        # fresh enough
-        resp = self.serializer.loads(request, self.cache.get(cache_url))
+        # Request allows serving from the cache, let's see if we find something
+        cache_data = self.cache.get(cache_url)
+        if cache_data is None:
+            logger.debug('No cache entry available')
+            return False
 
-        # Check to see if we have a cached object
+        # Check whether it can be deserialized
+        resp = self.serializer.loads(request, cache_data)
         if not resp:
+            logger.warning('Cache entry deserialization failed, entry ignored')
             return False
 
         # If we have a cached 301, return it immediately. We don't
@@ -114,14 +122,19 @@ class CacheController(object):
         # Client can try to refresh the value by repeating the request
         # with cache busting headers as usual (ie no-cache).
         if resp.status == 301:
+            msg = ('Returning cached "301 Moved Permanently" response '
+                   '(ignoring date and etag information)')
+            logger.debug(msg)
             return resp
 
         headers = CaseInsensitiveDict(resp.headers)
         if not headers or 'date' not in headers:
-            # With date or etag, the cached response can never be used
-            # and should be deleted.
             if 'etag' not in headers:
+                # Without date or etag, the cached response can never be used
+                # and should be deleted.
+                logger.debug('Purging cached response: no date or etag')
                 self.cache.delete(cache_url)
+            logger.debug('Ignoring cached response: no date')
             return False
 
         now = time.time()
@@ -129,6 +142,7 @@ class CacheController(object):
             parsedate_tz(headers['date'])
         )
         current_age = max(0, now - date)
+        logger.debug('Current age based on date: %i', current_age)
 
         # TODO: There is an assumption that the result will be a
         #       urllib3 response object. This may not be best since we
@@ -142,6 +156,8 @@ class CacheController(object):
         # Check the max-age pragma in the cache control header
         if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
             freshness_lifetime = int(resp_cc['max-age'])
+            logger.debug('Freshness lifetime from max-age: %i',
+                         freshness_lifetime)
 
         # If there isn't a max-age, check for an expires header
         elif 'expires' in headers:
@@ -149,11 +165,16 @@ class CacheController(object):
             if expires is not None:
                 expire_time = calendar.timegm(expires) - date
                 freshness_lifetime = max(0, expire_time)
+                logger.debug("Freshness lifetime from expires: %i",
+                             freshness_lifetime)
 
-        # determine if we are setting freshness limit in the req
+        # Determine if we are setting freshness limit in the
+        # request. Note, this overrides what was in the response.
         if 'max-age' in cc:
             try:
                 freshness_lifetime = int(cc['max-age'])
+                logger.debug('Freshness lifetime from request max-age: %i',
+                             freshness_lifetime)
             except ValueError:
                 freshness_lifetime = 0
 
@@ -164,15 +185,20 @@ class CacheController(object):
                 min_fresh = 0
             # adjust our current age by our min fresh
             current_age += min_fresh
+            logger.debug('Adjusted current age from min-fresh: %i',
+                         current_age)
 
-        # see how fresh we actually are
-        fresh = (freshness_lifetime > current_age)
-
-        if fresh:
+        # Return entry if it is fresh enough
+        if freshness_lifetime > current_age:
+            logger.debug('The response is "fresh", returning cached response')
+            logger.debug('%i > %i', freshness_lifetime, current_age)
             return resp
 
         # we're not fresh. If we don't have an Etag, clear it out
         if 'etag' not in headers:
+            logger.debug(
+                'The cached response is "stale" with no etag, purging'
+            )
             self.cache.delete(cache_url)
 
         # return the original handler
@@ -202,23 +228,48 @@ class CacheController(object):
         """
         # From httplib2: Don't cache 206's since we aren't going to
         #                handle byte range requests
-        if response.status not in [200, 203, 300, 301]:
+        cacheable_status_codes = [200, 203, 300, 301]
+        if response.status not in cacheable_status_codes:
+            logger.debug(
+                'Status code %s not in %s',
+                response.status,
+                cacheable_status_codes
+            )
             return
 
         response_headers = CaseInsensitiveDict(response.headers)
 
+        # If we've been given a body, our response has a Content-Length, that
+        # Content-Length is valid then we can check to see if the body we've
+        # been given matches the expected size, and if it doesn't we'll just
+        # skip trying to cache it.
+        if (body is not None and
+                "content-length" in response_headers and
+                response_headers["content-length"].isdigit() and
+                int(response_headers["content-length"]) != len(body)):
+            return
+
         cc_req = self.parse_cache_control(request.headers)
         cc = self.parse_cache_control(response_headers)
 
         cache_url = self.cache_url(request.url)
+        logger.debug('Updating cache with response from "%s"', cache_url)
 
         # Delete it from the cache if we happen to have it stored there
-        no_store = cc.get('no-store') or cc_req.get('no-store')
+        no_store = False
+        if cc.get('no-store'):
+            no_store = True
+            logger.debug('Response header has "no-store"')
+        if cc_req.get('no-store'):
+            no_store = True
+            logger.debug('Request header has "no-store"')
         if no_store and self.cache.get(cache_url):
+            logger.debug('Purging existing cache entry to honor "no-store"')
             self.cache.delete(cache_url)
 
         # If we've been given an etag, then keep the response
         if self.cache_etags and 'etag' in response_headers:
+            logger.debug('Caching due to etag')
             self.cache.set(
                 cache_url,
                 self.serializer.dumps(request, response, body=body),
@@ -227,6 +278,7 @@ class CacheController(object):
         # Add to the cache any 301s. We do this before looking that
         # the Date headers.
         elif response.status == 301:
+            logger.debug('Caching permanant redirect')
             self.cache.set(
                 cache_url,
                 self.serializer.dumps(request, response)
@@ -238,7 +290,8 @@ class CacheController(object):
         elif 'date' in response_headers:
             # cache when there is a max-age > 0
             if cc and cc.get('max-age'):
-                if int(cc['max-age']) > 0:
+                if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
+                    logger.debug('Caching b/c date exists and max-age > 0')
                     self.cache.set(
                         cache_url,
                         self.serializer.dumps(request, response, body=body),
@@ -248,6 +301,7 @@ class CacheController(object):
             # in the meantime.
             elif 'expires' in response_headers:
                 if response_headers['expires']:
+                    logger.debug('Caching b/c of expires header')
                     self.cache.set(
                         cache_url,
                         self.serializer.dumps(request, response, body=body),
diff --git a/cachecontrol/filewrapper.py b/cachecontrol/filewrapper.py
index 4b91bce..f1e1ce0 100644
--- a/cachecontrol/filewrapper.py
+++ b/cachecontrol/filewrapper.py
@@ -45,19 +45,34 @@ class CallbackFileWrapper(object):
         # TODO: Add some logging here...
         return False
 
+    def _close(self):
+        if self.__callback:
+            self.__callback(self.__buf.getvalue())
+
+        # We assign this to None here, because otherwise we can get into
+        # really tricky problems where the CPython interpreter dead locks
+        # because the callback is holding a reference to something which
+        # has a __del__ method. Setting this to None breaks the cycle
+        # and allows the garbage collector to do it's thing normally.
+        self.__callback = None
+
     def read(self, amt=None):
         data = self.__fp.read(amt)
         self.__buf.write(data)
+        if self.__is_fp_closed():
+            self._close()
 
+        return data
+
+    def _safe_read(self, amt):
+        data = self.__fp._safe_read(amt)
+        if amt == 2 and data == b'\r\n':
+            # urllib executes this read to toss the CRLF at the end
+            # of the chunk.
+            return data
+
+        self.__buf.write(data)
         if self.__is_fp_closed():
-            if self.__callback:
-                self.__callback(self.__buf.getvalue())
-
-            # We assign this to None here, because otherwise we can get into
-            # really tricky problems where the CPython interpreter dead locks
-            # because the callback is holding a reference to something which
-            # has a __del__ method. Setting this to None breaks the cycle
-            # and allows the garbage collector to do it's thing normally.
-            self.__callback = None
+            self._close()
 
         return data
diff --git a/cachecontrol/heuristics.py b/cachecontrol/heuristics.py
index 01b6314..94715a4 100644
--- a/cachecontrol/heuristics.py
+++ b/cachecontrol/heuristics.py
@@ -40,10 +40,14 @@ class BaseHeuristic(object):
         return {}
 
     def apply(self, response):
-        warning_header_value = self.warning(response)
-        response.headers.update(self.update_headers(response))
-        if warning_header_value is not None:
-            response.headers.update({'Warning': warning_header_value})
+        updated_headers = self.update_headers(response)
+
+        if updated_headers:
+            response.headers.update(updated_headers)
+            warning_header_value = self.warning(response)
+            if warning_header_value is not None:
+                response.headers.update({'Warning': warning_header_value})
+
         return response
 
 
diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py
index 6b17d80..eb917d7 100644
--- a/cachecontrol/serialize.py
+++ b/cachecontrol/serialize.py
@@ -5,7 +5,7 @@ import zlib
 
 from requests.structures import CaseInsensitiveDict
 
-from .compat import HTTPResponse, pickle
+from .compat import HTTPResponse, pickle, text_type
 
 
 def _b64_encode_bytes(b):
@@ -16,6 +16,12 @@ def _b64_encode_str(s):
     return _b64_encode_bytes(s.encode("utf8"))
 
 
+def _b64_encode(s):
+    if isinstance(s, text_type):
+        return _b64_encode_str(s)
+    return _b64_encode_bytes(s)
+
+
 def _b64_decode_bytes(b):
     return base64.b64decode(b.encode("ascii"))
 
@@ -48,7 +54,7 @@ class Serializer(object):
             "response": {
                 "body": _b64_encode_bytes(body),
                 "headers": dict(
-                    (_b64_encode_str(k), _b64_encode_str(v))
+                    (_b64_encode(k), _b64_encode(v))
                     for k, v in response.headers.items()
                 ),
                 "status": response.status,
@@ -69,7 +75,7 @@ class Serializer(object):
 
         # Encode our Vary headers to ensure they can be serialized as JSON
         data["vary"] = dict(
-            (_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
+            (_b64_encode(k), _b64_encode(v) if v is not None else v)
             for k, v in data["vary"].items()
         )
 
@@ -128,6 +134,12 @@ class Serializer(object):
 
         body_raw = cached["response"].pop("body")
 
+        headers = CaseInsensitiveDict(data=cached['response']['headers'])
+        if headers.get('transfer-encoding', '') == 'chunked':
+            headers.pop('transfer-encoding')
+
+        cached['response']['headers'] = headers
+
         try:
             body = io.BytesIO(body_raw)
         except TypeError:
diff --git a/setup.cfg b/setup.cfg
index d690f21..73e0b9f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-[pytest]
+[tool:pytest]
 norecursedirs = bin lib include build
 
 [egg_info]
diff --git a/setup.py b/setup.py
index e04a213..e8d255b 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ import setuptools
 
 long_description = open('README.rst').read()
 
-VERSION = '0.11.5'
+VERSION = '0.11.7'
 
 setup_params = dict(
     name='CacheControl',
@@ -23,6 +23,11 @@ setup_params = dict(
     extras_require={
         'filecache': ['lockfile>=0.9'],
     },
+    entry_points={
+        'console_scripts': [
+            'doesitcache = cachecontrol._cmd:main',
+        ]
+    },
     classifiers=[
         'Development Status :: 4 - Beta',
         'Environment :: Web Environment',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-cachecontrol.git



More information about the Python-modules-commits mailing list