[Python-modules-commits] [python-cachecontrol] 01/03: Import python-cachecontrol_0.11.7.orig.tar.gz
Barry Warsaw
barry at moszumanska.debian.org
Mon Oct 31 20:36:06 UTC 2016
This is an automated email from the git hooks/post-receive script.
barry pushed a commit to branch master
in repository python-cachecontrol.
commit a35f01178dfeb5c41724f4c2dff587f20c1d593b
Author: Barry Warsaw <barry at python.org>
Date: Mon Oct 31 16:28:24 2016 -0400
Import python-cachecontrol_0.11.7.orig.tar.gz
---
CacheControl.egg-info/PKG-INFO | 6 +--
CacheControl.egg-info/SOURCES.txt | 2 +
CacheControl.egg-info/entry_points.txt | 3 ++
PKG-INFO | 6 +--
README.rst | 4 +-
cachecontrol/__init__.py | 2 +-
cachecontrol/_cmd.py | 60 ++++++++++++++++++++++
cachecontrol/adapter.py | 18 +++++--
cachecontrol/compat.py | 6 +++
cachecontrol/controller.py | 94 ++++++++++++++++++++++++++--------
cachecontrol/filewrapper.py | 33 ++++++++----
cachecontrol/heuristics.py | 12 +++--
cachecontrol/serialize.py | 18 +++++--
setup.cfg | 2 +-
setup.py | 7 ++-
15 files changed, 221 insertions(+), 52 deletions(-)
diff --git a/CacheControl.egg-info/PKG-INFO b/CacheControl.egg-info/PKG-INFO
index 73e3419..aadf0b8 100644
--- a/CacheControl.egg-info/PKG-INFO
+++ b/CacheControl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: CacheControl
-Version: 0.11.5
+Version: 0.11.7
Summary: httplib2 caching for requests
Home-page: https://github.com/ionrock/cachecontrol
Author: Eric Larson
@@ -10,8 +10,8 @@ Description: ==============
CacheControl
==============
- .. image:: https://pypip.in/version/cachecontrol/badge.svg
- :target: https://pypi.python.org/pypi/cachecontrol/
+ .. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+ :target: https://pypi.python.org/pypi/cachecontrol
:alt: Latest Version
.. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/CacheControl.egg-info/SOURCES.txt b/CacheControl.egg-info/SOURCES.txt
index 04714c4..f89ee39 100644
--- a/CacheControl.egg-info/SOURCES.txt
+++ b/CacheControl.egg-info/SOURCES.txt
@@ -6,9 +6,11 @@ setup.py
CacheControl.egg-info/PKG-INFO
CacheControl.egg-info/SOURCES.txt
CacheControl.egg-info/dependency_links.txt
+CacheControl.egg-info/entry_points.txt
CacheControl.egg-info/requires.txt
CacheControl.egg-info/top_level.txt
cachecontrol/__init__.py
+cachecontrol/_cmd.py
cachecontrol/adapter.py
cachecontrol/cache.py
cachecontrol/compat.py
diff --git a/CacheControl.egg-info/entry_points.txt b/CacheControl.egg-info/entry_points.txt
new file mode 100644
index 0000000..7c31574
--- /dev/null
+++ b/CacheControl.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+doesitcache = cachecontrol._cmd:main
+
diff --git a/PKG-INFO b/PKG-INFO
index 73e3419..aadf0b8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: CacheControl
-Version: 0.11.5
+Version: 0.11.7
Summary: httplib2 caching for requests
Home-page: https://github.com/ionrock/cachecontrol
Author: Eric Larson
@@ -10,8 +10,8 @@ Description: ==============
CacheControl
==============
- .. image:: https://pypip.in/version/cachecontrol/badge.svg
- :target: https://pypi.python.org/pypi/cachecontrol/
+ .. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+ :target: https://pypi.python.org/pypi/cachecontrol
:alt: Latest Version
.. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/README.rst b/README.rst
index 1e790a0..7063ebf 100644
--- a/README.rst
+++ b/README.rst
@@ -2,8 +2,8 @@
CacheControl
==============
-.. image:: https://pypip.in/version/cachecontrol/badge.svg
- :target: https://pypi.python.org/pypi/cachecontrol/
+.. image:: https://img.shields.io/pypi/v/cachecontrol.svg
+ :target: https://pypi.python.org/pypi/cachecontrol
:alt: Latest Version
.. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
diff --git a/cachecontrol/__init__.py b/cachecontrol/__init__.py
index d6af9b9..ec9da2e 100644
--- a/cachecontrol/__init__.py
+++ b/cachecontrol/__init__.py
@@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric at ionrock.org'
-__version__ = '0.11.5'
+__version__ = '0.11.7'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
diff --git a/cachecontrol/_cmd.py b/cachecontrol/_cmd.py
new file mode 100644
index 0000000..fcb785d
--- /dev/null
+++ b/cachecontrol/_cmd.py
@@ -0,0 +1,60 @@
+import logging
+
+import requests
+
+from cachecontrol.adapter import CacheControlAdapter
+from cachecontrol.cache import DictCache
+from cachecontrol.controller import logger
+
+from argparse import ArgumentParser
+
+
+def setup_logging():
+ logger.setLevel(logging.DEBUG)
+ handler = logging.StreamHandler()
+ logger.addHandler(handler)
+
+
+def get_session():
+ adapter = CacheControlAdapter(
+ DictCache(),
+ cache_etags=True,
+ serializer=None,
+ heuristic=None,
+ )
+ sess = requests.Session()
+ sess.mount('http://', adapter)
+ sess.mount('https://', adapter)
+
+ sess.cache_controller = adapter.controller
+ return sess
+
+
+def get_args():
+ parser = ArgumentParser()
+ parser.add_argument('url', help='The URL to try and cache')
+ return parser.parse_args()
+
+
+def main(args=None):
+ args = get_args()
+ sess = get_session()
+
+ # Make a request to get a response
+ resp = sess.get(args.url)
+
+ # Turn on logging
+ setup_logging()
+
+ # try setting the cache
+ sess.cache_controller.cache_response(resp.request, resp.raw)
+
+ # Now try to get it
+ if sess.cache_controller.cached_request(resp.request):
+ print('Cached!')
+ else:
+ print('Not cached :(')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py
index 54f1b51..270c8b2 100644
--- a/cachecontrol/adapter.py
+++ b/cachecontrol/adapter.py
@@ -1,3 +1,4 @@
+import types
import functools
from requests.adapters import HTTPAdapter
@@ -55,6 +56,10 @@ class CacheControlAdapter(HTTPAdapter):
cached response
"""
if not from_cache and request.method == 'GET':
+ # Check for any heuristics that might update headers
+ # before trying to cache.
+ if self.heuristic:
+ response = self.heuristic.apply(response)
# apply any expiration heuristics
if response.status == 304:
@@ -82,11 +87,6 @@ class CacheControlAdapter(HTTPAdapter):
elif response.status == 301:
self.controller.cache_response(request, response)
else:
- # Check for any heuristics that might update headers
- # before trying to cache.
- if self.heuristic:
- response = self.heuristic.apply(response)
-
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
@@ -97,6 +97,14 @@ class CacheControlAdapter(HTTPAdapter):
response,
)
)
+ if response.chunked:
+ super_update_chunk_length = response._update_chunk_length
+
+ def _update_chunk_length(self):
+ super_update_chunk_length()
+ if self.chunk_left == 0:
+ self._fp._close()
+ response._update_chunk_length = types.MethodType(_update_chunk_length, response)
resp = super(CacheControlAdapter, self).build_response(
request, response
diff --git a/cachecontrol/compat.py b/cachecontrol/compat.py
index 489eb86..ce55657 100644
--- a/cachecontrol/compat.py
+++ b/cachecontrol/compat.py
@@ -21,3 +21,9 @@ try:
from requests.packages.urllib3.util import is_fp_closed
except ImportError:
from urllib3.util import is_fp_closed
+
+# Replicate some six behaviour
+try:
+ text_type = (unicode,)
+except NameError:
+ text_type = (str,)
diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py
index f038074..f93f083 100644
--- a/cachecontrol/controller.py
+++ b/cachecontrol/controller.py
@@ -1,6 +1,7 @@
"""
The httplib2 algorithms ported for use with requests.
"""
+import logging
import re
import calendar
import time
@@ -12,6 +13,8 @@ from .cache import DictCache
from .serialize import Serializer
+logger = logging.getLogger(__name__)
+
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
@@ -86,23 +89,28 @@ class CacheController(object):
return False.
"""
cache_url = self.cache_url(request.url)
+ logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
- # non-caching states
- no_cache = True if 'no-cache' in cc else False
- if 'max-age' in cc and cc['max-age'] == 0:
- no_cache = True
+ # Bail out if the request insists on fresh data
+ if 'no-cache' in cc:
+ logger.debug('Request header has "no-cache", cache bypassed')
+ return False
- # Bail out if no-cache was set
- if no_cache:
+ if 'max-age' in cc and cc['max-age'] == 0:
+ logger.debug('Request header has "max_age" as 0, cache bypassed')
return False
- # It is in the cache, so lets see if it is going to be
- # fresh enough
- resp = self.serializer.loads(request, self.cache.get(cache_url))
+ # Request allows serving from the cache, let's see if we find something
+ cache_data = self.cache.get(cache_url)
+ if cache_data is None:
+ logger.debug('No cache entry available')
+ return False
- # Check to see if we have a cached object
+ # Check whether it can be deserialized
+ resp = self.serializer.loads(request, cache_data)
if not resp:
+ logger.warning('Cache entry deserialization failed, entry ignored')
return False
# If we have a cached 301, return it immediately. We don't
@@ -114,14 +122,19 @@ class CacheController(object):
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
+ msg = ('Returning cached "301 Moved Permanently" response '
+ '(ignoring date and etag information)')
+ logger.debug(msg)
return resp
headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers:
- # With date or etag, the cached response can never be used
- # and should be deleted.
if 'etag' not in headers:
+ # Without date or etag, the cached response can never be used
+ # and should be deleted.
+ logger.debug('Purging cached response: no date or etag')
self.cache.delete(cache_url)
+ logger.debug('Ignoring cached response: no date')
return False
now = time.time()
@@ -129,6 +142,7 @@ class CacheController(object):
parsedate_tz(headers['date'])
)
current_age = max(0, now - date)
+ logger.debug('Current age based on date: %i', current_age)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
@@ -142,6 +156,8 @@ class CacheController(object):
# Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
+ logger.debug('Freshness lifetime from max-age: %i',
+ freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif 'expires' in headers:
@@ -149,11 +165,16 @@ class CacheController(object):
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
+ logger.debug("Freshness lifetime from expires: %i",
+ freshness_lifetime)
- # determine if we are setting freshness limit in the req
+ # Determine if we are setting freshness limit in the
+ # request. Note, this overrides what was in the response.
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
+ logger.debug('Freshness lifetime from request max-age: %i',
+ freshness_lifetime)
except ValueError:
freshness_lifetime = 0
@@ -164,15 +185,20 @@ class CacheController(object):
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
+ logger.debug('Adjusted current age from min-fresh: %i',
+ current_age)
- # see how fresh we actually are
- fresh = (freshness_lifetime > current_age)
-
- if fresh:
+ # Return entry if it is fresh enough
+ if freshness_lifetime > current_age:
+ logger.debug('The response is "fresh", returning cached response')
+ logger.debug('%i > %i', freshness_lifetime, current_age)
return resp
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in headers:
+ logger.debug(
+ 'The cached response is "stale" with no etag, purging'
+ )
self.cache.delete(cache_url)
# return the original handler
@@ -202,23 +228,48 @@ class CacheController(object):
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
- if response.status not in [200, 203, 300, 301]:
+ cacheable_status_codes = [200, 203, 300, 301]
+ if response.status not in cacheable_status_codes:
+ logger.debug(
+ 'Status code %s not in %s',
+ response.status,
+ cacheable_status_codes
+ )
return
response_headers = CaseInsensitiveDict(response.headers)
+ # If we've been given a body, our response has a Content-Length, that
+ # Content-Length is valid then we can check to see if the body we've
+ # been given matches the expected size, and if it doesn't we'll just
+ # skip trying to cache it.
+ if (body is not None and
+ "content-length" in response_headers and
+ response_headers["content-length"].isdigit() and
+ int(response_headers["content-length"]) != len(body)):
+ return
+
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url)
+ logger.debug('Updating cache with response from "%s"', cache_url)
# Delete it from the cache if we happen to have it stored there
- no_store = cc.get('no-store') or cc_req.get('no-store')
+ no_store = False
+ if cc.get('no-store'):
+ no_store = True
+ logger.debug('Response header has "no-store"')
+ if cc_req.get('no-store'):
+ no_store = True
+ logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url):
+ logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers:
+ logger.debug('Caching due to etag')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
@@ -227,6 +278,7 @@ class CacheController(object):
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
+ logger.debug('Caching permanant redirect')
self.cache.set(
cache_url,
self.serializer.dumps(request, response)
@@ -238,7 +290,8 @@ class CacheController(object):
elif 'date' in response_headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
- if int(cc['max-age']) > 0:
+ if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
+ logger.debug('Caching b/c date exists and max-age > 0')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
@@ -248,6 +301,7 @@ class CacheController(object):
# in the meantime.
elif 'expires' in response_headers:
if response_headers['expires']:
+ logger.debug('Caching b/c of expires header')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
diff --git a/cachecontrol/filewrapper.py b/cachecontrol/filewrapper.py
index 4b91bce..f1e1ce0 100644
--- a/cachecontrol/filewrapper.py
+++ b/cachecontrol/filewrapper.py
@@ -45,19 +45,34 @@ class CallbackFileWrapper(object):
# TODO: Add some logging here...
return False
+ def _close(self):
+ if self.__callback:
+ self.__callback(self.__buf.getvalue())
+
+ # We assign this to None here, because otherwise we can get into
+ # really tricky problems where the CPython interpreter dead locks
+ # because the callback is holding a reference to something which
+ # has a __del__ method. Setting this to None breaks the cycle
+ # and allows the garbage collector to do it's thing normally.
+ self.__callback = None
+
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
+ if self.__is_fp_closed():
+ self._close()
+ return data
+
+ def _safe_read(self, amt):
+ data = self.__fp._safe_read(amt)
+ if amt == 2 and data == b'\r\n':
+ # urllib executes this read to toss the CRLF at the end
+ # of the chunk.
+ return data
+
+ self.__buf.write(data)
if self.__is_fp_closed():
- if self.__callback:
- self.__callback(self.__buf.getvalue())
-
- # We assign this to None here, because otherwise we can get into
- # really tricky problems where the CPython interpreter dead locks
- # because the callback is holding a reference to something which
- # has a __del__ method. Setting this to None breaks the cycle
- # and allows the garbage collector to do it's thing normally.
- self.__callback = None
+ self._close()
return data
diff --git a/cachecontrol/heuristics.py b/cachecontrol/heuristics.py
index 01b6314..94715a4 100644
--- a/cachecontrol/heuristics.py
+++ b/cachecontrol/heuristics.py
@@ -40,10 +40,14 @@ class BaseHeuristic(object):
return {}
def apply(self, response):
- warning_header_value = self.warning(response)
- response.headers.update(self.update_headers(response))
- if warning_header_value is not None:
- response.headers.update({'Warning': warning_header_value})
+ updated_headers = self.update_headers(response)
+
+ if updated_headers:
+ response.headers.update(updated_headers)
+ warning_header_value = self.warning(response)
+ if warning_header_value is not None:
+ response.headers.update({'Warning': warning_header_value})
+
return response
diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py
index 6b17d80..eb917d7 100644
--- a/cachecontrol/serialize.py
+++ b/cachecontrol/serialize.py
@@ -5,7 +5,7 @@ import zlib
from requests.structures import CaseInsensitiveDict
-from .compat import HTTPResponse, pickle
+from .compat import HTTPResponse, pickle, text_type
def _b64_encode_bytes(b):
@@ -16,6 +16,12 @@ def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
+def _b64_encode(s):
+ if isinstance(s, text_type):
+ return _b64_encode_str(s)
+ return _b64_encode_bytes(s)
+
+
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
@@ -48,7 +54,7 @@ class Serializer(object):
"response": {
"body": _b64_encode_bytes(body),
"headers": dict(
- (_b64_encode_str(k), _b64_encode_str(v))
+ (_b64_encode(k), _b64_encode(v))
for k, v in response.headers.items()
),
"status": response.status,
@@ -69,7 +75,7 @@ class Serializer(object):
# Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict(
- (_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
+ (_b64_encode(k), _b64_encode(v) if v is not None else v)
for k, v in data["vary"].items()
)
@@ -128,6 +134,12 @@ class Serializer(object):
body_raw = cached["response"].pop("body")
+ headers = CaseInsensitiveDict(data=cached['response']['headers'])
+ if headers.get('transfer-encoding', '') == 'chunked':
+ headers.pop('transfer-encoding')
+
+ cached['response']['headers'] = headers
+
try:
body = io.BytesIO(body_raw)
except TypeError:
diff --git a/setup.cfg b/setup.cfg
index d690f21..73e0b9f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-[pytest]
+[tool:pytest]
norecursedirs = bin lib include build
[egg_info]
diff --git a/setup.py b/setup.py
index e04a213..e8d255b 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ import setuptools
long_description = open('README.rst').read()
-VERSION = '0.11.5'
+VERSION = '0.11.7'
setup_params = dict(
name='CacheControl',
@@ -23,6 +23,11 @@ setup_params = dict(
extras_require={
'filecache': ['lockfile>=0.9'],
},
+ entry_points={
+ 'console_scripts': [
+ 'doesitcache = cachecontrol._cmd:main',
+ ]
+ },
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Web Environment',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-cachecontrol.git
More information about the Python-modules-commits
mailing list