[Python-modules-commits] [python-cachecontrol] 01/03: import python-cachecontrol_0.11.3.orig.tar.gz
Barry Warsaw
barry at moszumanska.debian.org
Fri May 15 22:07:57 UTC 2015
This is an automated email from the git hooks/post-receive script.
barry pushed a commit to branch master
in repository python-cachecontrol.
commit 759025fd9491d3380c8b989dc2eaf675d9647412
Author: Barry Warsaw <barry at python.org>
Date: Fri May 15 17:54:42 2015 -0400
import python-cachecontrol_0.11.3.orig.tar.gz
---
CacheControl.egg-info/PKG-INFO | 64 ++++++
CacheControl.egg-info/SOURCES.txt | 20 ++
CacheControl.egg-info/dependency_links.txt | 1 +
CacheControl.egg-info/requires.txt | 4 +
CacheControl.egg-info/top_level.txt | 1 +
PKG-INFO | 64 ++++++
README.rst | 42 ++++
cachecontrol/__init__.py | 11 ++
cachecontrol/adapter.py | 117 +++++++++++
cachecontrol/cache.py | 39 ++++
cachecontrol/caches/__init__.py | 18 ++
cachecontrol/caches/file_cache.py | 103 ++++++++++
cachecontrol/caches/redis_cache.py | 41 ++++
cachecontrol/compat.py | 23 +++
cachecontrol/controller.py | 299 +++++++++++++++++++++++++++++
cachecontrol/filewrapper.py | 63 ++++++
cachecontrol/heuristics.py | 134 +++++++++++++
cachecontrol/serialize.py | 184 ++++++++++++++++++
cachecontrol/wrapper.py | 21 ++
setup.cfg | 8 +
setup.py | 40 ++++
21 files changed, 1297 insertions(+)
diff --git a/CacheControl.egg-info/PKG-INFO b/CacheControl.egg-info/PKG-INFO
new file mode 100644
index 0000000..2ee2bc4
--- /dev/null
+++ b/CacheControl.egg-info/PKG-INFO
@@ -0,0 +1,64 @@
+Metadata-Version: 1.1
+Name: CacheControl
+Version: 0.11.3
+Summary: httplib2 caching for requests
+Home-page: https://github.com/ionrock/cachecontrol
+Author: Eric Larson
+Author-email: eric at ionrock.org
+License: UNKNOWN
+Description: ==============
+ CacheControl
+ ==============
+
+ .. image:: https://pypip.in/version/cachecontrol/badge.svg
+ :target: https://pypi.python.org/pypi/cachecontrol/
+ :alt: Latest Version
+
+ .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+ :target: https://travis-ci.org/ionrock/cachecontrol
+
+ CacheControl is a port of the caching algorithms in httplib2_ for use with
+ requests_ session object.
+
+ It was written because httplib2's better support for caching is often
+ mitigated by its lack of threadsafety. The same is true of requests in
+ terms of caching.
+
+
+ Quickstart
+ ==========
+
+ .. code-block:: python
+
+ import requests
+
+ from cachecontrol import CacheControl
+
+
+ sess = requests.session()
+ cached_sess = CacheControl(sess)
+
+ response = cached_sess.get('http://google.com')
+
+ If the URL contains any caching based headers, it will cache the
+ result in a simple dictionary.
+
+ For more info, check out the docs_
+
+ .. _docs: http://cachecontrol.readthedocs.org/en/latest/
+ .. _httplib2: https://github.com/jcgregorio/httplib2
+ .. _requests: http://docs.python-requests.org/
+
+Keywords: requests http caching web
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Web Environment
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Topic :: Internet :: WWW/HTTP
diff --git a/CacheControl.egg-info/SOURCES.txt b/CacheControl.egg-info/SOURCES.txt
new file mode 100644
index 0000000..61a3577
--- /dev/null
+++ b/CacheControl.egg-info/SOURCES.txt
@@ -0,0 +1,20 @@
+README.rst
+setup.cfg
+setup.py
+CacheControl.egg-info/PKG-INFO
+CacheControl.egg-info/SOURCES.txt
+CacheControl.egg-info/dependency_links.txt
+CacheControl.egg-info/requires.txt
+CacheControl.egg-info/top_level.txt
+cachecontrol/__init__.py
+cachecontrol/adapter.py
+cachecontrol/cache.py
+cachecontrol/compat.py
+cachecontrol/controller.py
+cachecontrol/filewrapper.py
+cachecontrol/heuristics.py
+cachecontrol/serialize.py
+cachecontrol/wrapper.py
+cachecontrol/caches/__init__.py
+cachecontrol/caches/file_cache.py
+cachecontrol/caches/redis_cache.py
\ No newline at end of file
diff --git a/CacheControl.egg-info/dependency_links.txt b/CacheControl.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/CacheControl.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/CacheControl.egg-info/requires.txt b/CacheControl.egg-info/requires.txt
new file mode 100644
index 0000000..f330760
--- /dev/null
+++ b/CacheControl.egg-info/requires.txt
@@ -0,0 +1,4 @@
+requests
+
+[filecache]
+lockfile
diff --git a/CacheControl.egg-info/top_level.txt b/CacheControl.egg-info/top_level.txt
new file mode 100644
index 0000000..af37ac6
--- /dev/null
+++ b/CacheControl.egg-info/top_level.txt
@@ -0,0 +1 @@
+cachecontrol
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..2ee2bc4
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,64 @@
+Metadata-Version: 1.1
+Name: CacheControl
+Version: 0.11.3
+Summary: httplib2 caching for requests
+Home-page: https://github.com/ionrock/cachecontrol
+Author: Eric Larson
+Author-email: eric at ionrock.org
+License: UNKNOWN
+Description: ==============
+ CacheControl
+ ==============
+
+ .. image:: https://pypip.in/version/cachecontrol/badge.svg
+ :target: https://pypi.python.org/pypi/cachecontrol/
+ :alt: Latest Version
+
+ .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+ :target: https://travis-ci.org/ionrock/cachecontrol
+
+ CacheControl is a port of the caching algorithms in httplib2_ for use with
+ requests_ session object.
+
+ It was written because httplib2's better support for caching is often
+ mitigated by its lack of threadsafety. The same is true of requests in
+ terms of caching.
+
+
+ Quickstart
+ ==========
+
+ .. code-block:: python
+
+ import requests
+
+ from cachecontrol import CacheControl
+
+
+ sess = requests.session()
+ cached_sess = CacheControl(sess)
+
+ response = cached_sess.get('http://google.com')
+
+ If the URL contains any caching based headers, it will cache the
+ result in a simple dictionary.
+
+ For more info, check out the docs_
+
+ .. _docs: http://cachecontrol.readthedocs.org/en/latest/
+ .. _httplib2: https://github.com/jcgregorio/httplib2
+ .. _requests: http://docs.python-requests.org/
+
+Keywords: requests http caching web
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Web Environment
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Topic :: Internet :: WWW/HTTP
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..1e790a0
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,42 @@
+==============
+ CacheControl
+==============
+
+.. image:: https://pypip.in/version/cachecontrol/badge.svg
+ :target: https://pypi.python.org/pypi/cachecontrol/
+ :alt: Latest Version
+
+.. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+ :target: https://travis-ci.org/ionrock/cachecontrol
+
+CacheControl is a port of the caching algorithms in httplib2_ for use with
+requests_ session object.
+
+It was written because httplib2's better support for caching is often
+mitigated by its lack of threadsafety. The same is true of requests in
+terms of caching.
+
+
+Quickstart
+==========
+
+.. code-block:: python
+
+ import requests
+
+ from cachecontrol import CacheControl
+
+
+ sess = requests.session()
+ cached_sess = CacheControl(sess)
+
+ response = cached_sess.get('http://google.com')
+
+If the URL contains any caching based headers, it will cache the
+result in a simple dictionary.
+
+For more info, check out the docs_
+
+.. _docs: http://cachecontrol.readthedocs.org/en/latest/
+.. _httplib2: https://github.com/jcgregorio/httplib2
+.. _requests: http://docs.python-requests.org/
diff --git a/cachecontrol/__init__.py b/cachecontrol/__init__.py
new file mode 100644
index 0000000..4cb8ff5
--- /dev/null
+++ b/cachecontrol/__init__.py
@@ -0,0 +1,11 @@
+"""CacheControl import Interface.
+
+Make it easy to import from cachecontrol without long namespaces.
+"""
+__author__ = 'Eric Larson'
+__email__ = 'eric at ionrock.org'
+__version__ = '0.11.3'
+
+from .wrapper import CacheControl
+from .adapter import CacheControlAdapter
+from .controller import CacheController
diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py
new file mode 100644
index 0000000..54f1b51
--- /dev/null
+++ b/cachecontrol/adapter.py
@@ -0,0 +1,117 @@
+import functools
+
+from requests.adapters import HTTPAdapter
+
+from .controller import CacheController
+from .cache import DictCache
+from .filewrapper import CallbackFileWrapper
+
+
+class CacheControlAdapter(HTTPAdapter):
+ invalidating_methods = set(['PUT', 'DELETE'])
+
+ def __init__(self, cache=None,
+ cache_etags=True,
+ controller_class=None,
+ serializer=None,
+ heuristic=None,
+ *args, **kw):
+ super(CacheControlAdapter, self).__init__(*args, **kw)
+ self.cache = cache or DictCache()
+ self.heuristic = heuristic
+
+ controller_factory = controller_class or CacheController
+ self.controller = controller_factory(
+ self.cache,
+ cache_etags=cache_etags,
+ serializer=serializer,
+ )
+
+ def send(self, request, **kw):
+ """
+ Send a request. Use the request information to see if it
+ exists in the cache and cache the response if we need to and can.
+ """
+ if request.method == 'GET':
+ cached_response = self.controller.cached_request(request)
+ if cached_response:
+ return self.build_response(request, cached_response,
+ from_cache=True)
+
+ # check for etags and add headers if appropriate
+ request.headers.update(
+ self.controller.conditional_headers(request)
+ )
+
+ resp = super(CacheControlAdapter, self).send(request, **kw)
+
+ return resp
+
+ def build_response(self, request, response, from_cache=False):
+ """
+ Build a response by making a request or using the cache.
+
+ This will end up calling send and returning a potentially
+ cached response
+ """
+ if not from_cache and request.method == 'GET':
+
+ # apply any expiration heuristics
+ if response.status == 304:
+ # We must have sent an ETag request. This could mean
+ # that we've been expired already or that we simply
+ # have an etag. In either case, we want to try and
+ # update the cache if that is the case.
+ cached_response = self.controller.update_cached_response(
+ request, response
+ )
+
+ if cached_response is not response:
+ from_cache = True
+
+ # We are done with the server response, read a
+ # possible response body (compliant servers will
+ # not return one, but we cannot be 100% sure) and
+ # release the connection back to the pool.
+ response.read(decode_content=False)
+ response.release_conn()
+
+ response = cached_response
+
+ # We always cache the 301 responses
+ elif response.status == 301:
+ self.controller.cache_response(request, response)
+ else:
+ # Check for any heuristics that might update headers
+ # before trying to cache.
+ if self.heuristic:
+ response = self.heuristic.apply(response)
+
+ # Wrap the response file with a wrapper that will cache the
+ # response when the stream has been consumed.
+ response._fp = CallbackFileWrapper(
+ response._fp,
+ functools.partial(
+ self.controller.cache_response,
+ request,
+ response,
+ )
+ )
+
+ resp = super(CacheControlAdapter, self).build_response(
+ request, response
+ )
+
+ # See if we should invalidate the cache.
+ if request.method in self.invalidating_methods and resp.ok:
+ cache_url = self.controller.cache_url(request.url)
+ self.cache.delete(cache_url)
+
+ # Give the request a from_cache attr to let people use it
+ resp.from_cache = from_cache
+
+ return resp
+
+ def close(self):
+ self.cache.close()
+ super(CacheControlAdapter, self).close()
diff --git a/cachecontrol/cache.py b/cachecontrol/cache.py
new file mode 100644
index 0000000..7389a73
--- /dev/null
+++ b/cachecontrol/cache.py
@@ -0,0 +1,39 @@
+"""
+The cache object API for implementing caches. The default is a thread
+safe in-memory dictionary.
+"""
+from threading import Lock
+
+
+class BaseCache(object):
+
+ def get(self, key):
+ raise NotImplemented()
+
+ def set(self, key, value):
+ raise NotImplemented()
+
+ def delete(self, key):
+ raise NotImplemented()
+
+ def close(self):
+ pass
+
+
+class DictCache(BaseCache):
+
+ def __init__(self, init_dict=None):
+ self.lock = Lock()
+ self.data = init_dict or {}
+
+ def get(self, key):
+ return self.data.get(key, None)
+
+ def set(self, key, value):
+ with self.lock:
+ self.data.update({key: value})
+
+ def delete(self, key):
+ with self.lock:
+ if key in self.data:
+ self.data.pop(key)
diff --git a/cachecontrol/caches/__init__.py b/cachecontrol/caches/__init__.py
new file mode 100644
index 0000000..f9e66a1
--- /dev/null
+++ b/cachecontrol/caches/__init__.py
@@ -0,0 +1,18 @@
+from textwrap import dedent
+
+try:
+ from .file_cache import FileCache
+except ImportError:
+ notice = dedent('''
+ NOTE: In order to use the FileCache you must have
+ lockfile installed. You can install it via pip:
+ pip install lockfile
+ ''')
+ print(notice)
+
+
+try:
+ import redis
+ from .redis_cache import RedisCache
+except ImportError:
+ pass
diff --git a/cachecontrol/caches/file_cache.py b/cachecontrol/caches/file_cache.py
new file mode 100644
index 0000000..fd12c8a
--- /dev/null
+++ b/cachecontrol/caches/file_cache.py
@@ -0,0 +1,103 @@
+import hashlib
+import os
+
+from lockfile import FileLock
+
+from ..cache import BaseCache
+from ..controller import CacheController
+
+
+def _secure_open_write(filename, fmode):
+ # We only want to write to this file, so open it in write only mode
+ flags = os.O_WRONLY
+
+ # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
+ # will open *new* files.
+ # We specify this because we want to ensure that the mode we pass is the
+ # mode of the file.
+ flags |= os.O_CREAT | os.O_EXCL
+
+ # Do not follow symlinks to prevent someone from making a symlink that
+ # we follow and insecurely open a cache file.
+ if hasattr(os, "O_NOFOLLOW"):
+ flags |= os.O_NOFOLLOW
+
+ # On Windows we'll mark this file as binary
+ if hasattr(os, "O_BINARY"):
+ flags |= os.O_BINARY
+
+ # Before we open our file, we want to delete any existing file that is
+ # there
+ try:
+ os.remove(filename)
+ except (IOError, OSError):
+ # The file must not exist already, so we can just skip ahead to opening
+ pass
+
+ # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
+ # race condition happens between the os.remove and this line, that an
+ # error will be raised. Because we utilize a lockfile this should only
+ # happen if someone is attempting to attack us.
+ fd = os.open(filename, flags, fmode)
+ try:
+ return os.fdopen(fd, "wb")
+ except:
+ # An error occurred wrapping our FD in a file object
+ os.close(fd)
+ raise
+
+
+class FileCache(BaseCache):
+ def __init__(self, directory, forever=False, filemode=0o0600,
+ dirmode=0o0700):
+ self.directory = directory
+ self.forever = forever
+ self.filemode = filemode
+ self.dirmode = dirmode
+
+ @staticmethod
+ def encode(x):
+ return hashlib.sha224(x.encode()).hexdigest()
+
+ def _fn(self, name):
+ # NOTE: This method should not change as some may depend on it.
+ # See: https://github.com/ionrock/cachecontrol/issues/63
+ hashed = self.encode(name)
+ parts = list(hashed[:5]) + [hashed]
+ return os.path.join(self.directory, *parts)
+
+ def get(self, key):
+ name = self._fn(key)
+ if not os.path.exists(name):
+ return None
+
+ with open(name, 'rb') as fh:
+ return fh.read()
+
+ def set(self, key, value):
+ name = self._fn(key)
+
+ # Make sure the directory exists
+ try:
+ os.makedirs(os.path.dirname(name), self.dirmode)
+ except (IOError, OSError):
+ pass
+
+ with FileLock(name) as lock:
+ # Write our actual file
+ with _secure_open_write(lock.path, self.filemode) as fh:
+ fh.write(value)
+
+ def delete(self, key):
+ name = self._fn(key)
+ if not self.forever:
+ os.remove(name)
+
+
+def url_to_file_path(url, filecache):
+ """Return the file cache path based on the URL.
+
+ This does not ensure the file exists!
+ """
+ key = CacheController.cache_url(url)
+ return filecache._fn(key)
diff --git a/cachecontrol/caches/redis_cache.py b/cachecontrol/caches/redis_cache.py
new file mode 100644
index 0000000..9f5d55f
--- /dev/null
+++ b/cachecontrol/caches/redis_cache.py
@@ -0,0 +1,41 @@
+from __future__ import division
+
+from datetime import datetime
+
+
+def total_seconds(td):
+ """Python 2.6 compatability"""
+ if hasattr(td, 'total_seconds'):
+ return td.total_seconds()
+
+ ms = td.microseconds
+ secs = (td.seconds + td.days * 24 * 3600)
+ return (ms + secs * 10**6) / 10**6
+
+
+class RedisCache(object):
+
+ def __init__(self, conn):
+ self.conn = conn
+
+ def get(self, key):
+ return self.conn.get(key)
+
+ def set(self, key, value, expires=None):
+ if not expires:
+ self.conn.set(key, value)
+ else:
+ expires = expires - datetime.now()
+ self.conn.setex(key, total_seconds(expires), value)
+
+ def delete(self, key):
+ self.conn.delete(key)
+
+ def clear(self):
+ """Helper for clearing all the keys in a database. Use with
+ caution!"""
+ for key in self.conn.keys():
+ self.conn.delete(key)
+
+ def close(self):
+ self.conn.disconnect()
diff --git a/cachecontrol/compat.py b/cachecontrol/compat.py
new file mode 100644
index 0000000..489eb86
--- /dev/null
+++ b/cachecontrol/compat.py
@@ -0,0 +1,23 @@
+try:
+ from urllib.parse import urljoin
+except ImportError:
+ from urlparse import urljoin
+
+
+try:
+ import cPickle as pickle
+except ImportError:
+ import pickle
+
+
+# Handle the case where the requests module has been patched to not have
+# urllib3 bundled as part of its source.
+try:
+ from requests.packages.urllib3.response import HTTPResponse
+except ImportError:
+ from urllib3.response import HTTPResponse
+
+try:
+ from requests.packages.urllib3.util import is_fp_closed
+except ImportError:
+ from urllib3.util import is_fp_closed
diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py
new file mode 100644
index 0000000..f038074
--- /dev/null
+++ b/cachecontrol/controller.py
@@ -0,0 +1,299 @@
+"""
+The httplib2 algorithms ported for use with requests.
+"""
+import re
+import calendar
+import time
+from email.utils import parsedate_tz
+
+from requests.structures import CaseInsensitiveDict
+
+from .cache import DictCache
+from .serialize import Serializer
+
+
+URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
+
+
+def parse_uri(uri):
+ """Parses a URI using the regex given in Appendix B of RFC 3986.
+
+ (scheme, authority, path, query, fragment) = parse_uri(uri)
+ """
+ groups = URI.match(uri).groups()
+ return (groups[1], groups[3], groups[4], groups[6], groups[8])
+
+
+class CacheController(object):
+ """An interface to see if request should cached or not.
+ """
+ def __init__(self, cache=None, cache_etags=True, serializer=None):
+ self.cache = cache or DictCache()
+ self.cache_etags = cache_etags
+ self.serializer = serializer or Serializer()
+
+ @classmethod
+ def _urlnorm(cls, uri):
+ """Normalize the URL to create a safe key for the cache"""
+ (scheme, authority, path, query, fragment) = parse_uri(uri)
+ if not scheme or not authority:
+ raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+
+ scheme = scheme.lower()
+ authority = authority.lower()
+
+ if not path:
+ path = "/"
+
+ # Could do syntax based normalization of the URI before
+ # computing the digest. See Section 6.2.2 of Std 66.
+ request_uri = query and "?".join([path, query]) or path
+ defrag_uri = scheme + "://" + authority + request_uri
+
+ return defrag_uri
+
+ @classmethod
+ def cache_url(cls, uri):
+ return cls._urlnorm(uri)
+
+ def parse_cache_control(self, headers):
+ """
+ Parse the cache control headers returning a dictionary with values
+ for the different directives.
+ """
+ retval = {}
+
+ cc_header = 'cache-control'
+ if 'Cache-Control' in headers:
+ cc_header = 'Cache-Control'
+
+ if cc_header in headers:
+ parts = headers[cc_header].split(',')
+ parts_with_args = [
+ tuple([x.strip().lower() for x in part.split("=", 1)])
+ for part in parts if -1 != part.find("=")
+ ]
+ parts_wo_args = [
+ (name.strip().lower(), 1)
+ for name in parts if -1 == name.find("=")
+ ]
+ retval = dict(parts_with_args + parts_wo_args)
+ return retval
+
+ def cached_request(self, request):
+ """
+ Return a cached response if it exists in the cache, otherwise
+ return False.
+ """
+ cache_url = self.cache_url(request.url)
+ cc = self.parse_cache_control(request.headers)
+
+ # non-caching states
+ no_cache = True if 'no-cache' in cc else False
+ if 'max-age' in cc and cc['max-age'] == 0:
+ no_cache = True
+
+ # Bail out if no-cache was set
+ if no_cache:
+ return False
+
+ # It is in the cache, so lets see if it is going to be
+ # fresh enough
+ resp = self.serializer.loads(request, self.cache.get(cache_url))
+
+ # Check to see if we have a cached object
+ if not resp:
+ return False
+
+ # If we have a cached 301, return it immediately. We don't
+ # need to test our response for other headers b/c it is
+ # intrinsically "cacheable" as it is Permanent.
+ # See:
+ # https://tools.ietf.org/html/rfc7231#section-6.4.2
+ #
+ # Client can try to refresh the value by repeating the request
+ # with cache busting headers as usual (ie no-cache).
+ if resp.status == 301:
+ return resp
+
+ headers = CaseInsensitiveDict(resp.headers)
+ if not headers or 'date' not in headers:
+ # With date or etag, the cached response can never be used
+ # and should be deleted.
+ if 'etag' not in headers:
+ self.cache.delete(cache_url)
+ return False
+
+ now = time.time()
+ date = calendar.timegm(
+ parsedate_tz(headers['date'])
+ )
+ current_age = max(0, now - date)
+
+ # TODO: There is an assumption that the result will be a
+ # urllib3 response object. This may not be best since we
+ # could probably avoid instantiating or constructing the
+ # response until we know we need it.
+ resp_cc = self.parse_cache_control(headers)
+
+ # determine freshness
+ freshness_lifetime = 0
+
+ # Check the max-age pragma in the cache control header
+ if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
+ freshness_lifetime = int(resp_cc['max-age'])
+
+ # If there isn't a max-age, check for an expires header
+ elif 'expires' in headers:
+ expires = parsedate_tz(headers['expires'])
+ if expires is not None:
+ expire_time = calendar.timegm(expires) - date
+ freshness_lifetime = max(0, expire_time)
+
+ # determine if we are setting freshness limit in the req
+ if 'max-age' in cc:
+ try:
+ freshness_lifetime = int(cc['max-age'])
+ except ValueError:
+ freshness_lifetime = 0
+
+ if 'min-fresh' in cc:
+ try:
+ min_fresh = int(cc['min-fresh'])
+ except ValueError:
+ min_fresh = 0
+ # adjust our current age by our min fresh
+ current_age += min_fresh
+
+ # see how fresh we actually are
+ fresh = (freshness_lifetime > current_age)
+
+ if fresh:
+ return resp
+
+ # we're not fresh. If we don't have an Etag, clear it out
+ if 'etag' not in headers:
+ self.cache.delete(cache_url)
+
+ # return the original handler
+ return False
+
+ def conditional_headers(self, request):
+ cache_url = self.cache_url(request.url)
+ resp = self.serializer.loads(request, self.cache.get(cache_url))
+ new_headers = {}
+
+ if resp:
+ headers = CaseInsensitiveDict(resp.headers)
+
+ if 'etag' in headers:
+ new_headers['If-None-Match'] = headers['ETag']
+
+ if 'last-modified' in headers:
+ new_headers['If-Modified-Since'] = headers['Last-Modified']
+
+ return new_headers
+
+ def cache_response(self, request, response, body=None):
+ """
+ Algorithm for caching requests.
+
+ This assumes a requests Response object.
+ """
+ # From httplib2: Don't cache 206's since we aren't going to
+ # handle byte range requests
+ if response.status not in [200, 203, 300, 301]:
+ return
+
+ response_headers = CaseInsensitiveDict(response.headers)
+
+ cc_req = self.parse_cache_control(request.headers)
+ cc = self.parse_cache_control(response_headers)
+
+ cache_url = self.cache_url(request.url)
+
+ # Delete it from the cache if we happen to have it stored there
+ no_store = cc.get('no-store') or cc_req.get('no-store')
+ if no_store and self.cache.get(cache_url):
+ self.cache.delete(cache_url)
+
+ # If we've been given an etag, then keep the response
+ if self.cache_etags and 'etag' in response_headers:
+ self.cache.set(
+ cache_url,
+ self.serializer.dumps(request, response, body=body),
+ )
+
+ # Add to the cache any 301s. We do this before looking that
+ # the Date headers.
+ elif response.status == 301:
+ self.cache.set(
+ cache_url,
+ self.serializer.dumps(request, response)
+ )
+
+ # Add to the cache if the response headers demand it. If there
+ # is no date header then we can't do anything about expiring
+ # the cache.
+ elif 'date' in response_headers:
+ # cache when there is a max-age > 0
+ if cc and cc.get('max-age'):
+ if int(cc['max-age']) > 0:
+ self.cache.set(
+ cache_url,
+ self.serializer.dumps(request, response, body=body),
+ )
+
+ # If the request can expire, it means we should cache it
+ # in the meantime.
+ elif 'expires' in response_headers:
+ if response_headers['expires']:
+ self.cache.set(
+ cache_url,
+ self.serializer.dumps(request, response, body=body),
+ )
+
+ def update_cached_response(self, request, response):
+ """On a 304 we will get a new set of headers that we want to
+ update our cached value with, assuming we have one.
+
+ This should only ever be called when we've sent an ETag and
+ gotten a 304 as the response.
+ """
+ cache_url = self.cache_url(request.url)
+
+ cached_response = self.serializer.loads(
+ request,
+ self.cache.get(cache_url)
+ )
+
+ if not cached_response:
+ # we didn't have a cached response
+ return response
+
+ # Lets update our headers with the headers from the new request:
+ # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
+ #
+ # The server isn't supposed to send headers that would make
+ # the cached body invalid. But... just in case, we'll be sure
+ # to strip out ones we know that might be problmatic due to
+ # typical assumptions.
+ excluded_headers = [
+ "content-length",
+ ]
+
+ cached_response.headers.update(
+ dict((k, v) for k, v in response.headers.items()
+ if k.lower() not in excluded_headers)
+ )
+
+ # we want a 200 b/c we have content via the cache
+ cached_response.status = 200
+
+ # update our cache
+ self.cache.set(
+ cache_url,
+ self.serializer.dumps(request, cached_response),
+ )
+
+ return cached_response
diff --git a/cachecontrol/filewrapper.py b/cachecontrol/filewrapper.py
new file mode 100644
index 0000000..4b91bce
--- /dev/null
+++ b/cachecontrol/filewrapper.py
@@ -0,0 +1,63 @@
+from io import BytesIO
+
+
+class CallbackFileWrapper(object):
+ """
+ Small wrapper around a fp object which will tee everything read into a
+ buffer, and when that file is closed it will execute a callback with the
+ contents of that buffer.
+
+ All attributes are proxied to the underlying file object.
+
+ This class uses members with a double underscore (__) leading prefix so as
+ not to accidentally shadow an attribute.
+ """
+
+ def __init__(self, fp, callback):
+ self.__buf = BytesIO()
+ self.__fp = fp
+ self.__callback = callback
+
+ def __getattr__(self, name):
+ # The vaguaries of garbage collection means that self.__fp is
+ # not always set. By using __getattribute__ and the private
+ # name[0] allows looking up the attribute value and raising an
+ # AttributeError when it doesn't exist. This stop thigns from
+ # infinitely recursing calls to getattr in the case where
+ # self.__fp hasn't been set.
... 453 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-cachecontrol.git
More information about the Python-modules-commits
mailing list