[Python-modules-commits] [python-cachecontrol] 01/03: import python-cachecontrol_0.11.3.orig.tar.gz

Barry Warsaw barry at moszumanska.debian.org
Fri May 15 22:07:57 UTC 2015


This is an automated email from the git hooks/post-receive script.

barry pushed a commit to branch master
in repository python-cachecontrol.

commit 759025fd9491d3380c8b989dc2eaf675d9647412
Author: Barry Warsaw <barry at python.org>
Date:   Fri May 15 17:54:42 2015 -0400

    import python-cachecontrol_0.11.3.orig.tar.gz
---
 CacheControl.egg-info/PKG-INFO             |  64 ++++++
 CacheControl.egg-info/SOURCES.txt          |  20 ++
 CacheControl.egg-info/dependency_links.txt |   1 +
 CacheControl.egg-info/requires.txt         |   4 +
 CacheControl.egg-info/top_level.txt        |   1 +
 PKG-INFO                                   |  64 ++++++
 README.rst                                 |  42 ++++
 cachecontrol/__init__.py                   |  11 ++
 cachecontrol/adapter.py                    | 117 +++++++++++
 cachecontrol/cache.py                      |  39 ++++
 cachecontrol/caches/__init__.py            |  18 ++
 cachecontrol/caches/file_cache.py          | 103 ++++++++++
 cachecontrol/caches/redis_cache.py         |  41 ++++
 cachecontrol/compat.py                     |  23 +++
 cachecontrol/controller.py                 | 299 +++++++++++++++++++++++++++++
 cachecontrol/filewrapper.py                |  63 ++++++
 cachecontrol/heuristics.py                 | 134 +++++++++++++
 cachecontrol/serialize.py                  | 184 ++++++++++++++++++
 cachecontrol/wrapper.py                    |  21 ++
 setup.cfg                                  |   8 +
 setup.py                                   |  40 ++++
 21 files changed, 1297 insertions(+)

diff --git a/CacheControl.egg-info/PKG-INFO b/CacheControl.egg-info/PKG-INFO
new file mode 100644
index 0000000..2ee2bc4
--- /dev/null
+++ b/CacheControl.egg-info/PKG-INFO
@@ -0,0 +1,64 @@
+Metadata-Version: 1.1
+Name: CacheControl
+Version: 0.11.3
+Summary: httplib2 caching for requests
+Home-page: https://github.com/ionrock/cachecontrol
+Author: Eric Larson
+Author-email: eric at ionrock.org
+License: UNKNOWN
+Description: ==============
+         CacheControl
+        ==============
+        
+        .. image:: https://pypip.in/version/cachecontrol/badge.svg
+            :target: https://pypi.python.org/pypi/cachecontrol/
+            :alt: Latest Version
+        
+        .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+          :target: https://travis-ci.org/ionrock/cachecontrol
+        
+        CacheControl is a port of the caching algorithms in httplib2_ for use with
+        requests_ session object.
+        
+        It was written because httplib2's better support for caching is often
+        mitigated by its lack of threadsafety. The same is true of requests in
+        terms of caching.
+        
+        
+        Quickstart
+        ==========
+        
+        .. code-block:: python
+        
+          import requests
+        
+          from cachecontrol import CacheControl
+        
+        
+          sess = requests.session()
+          cached_sess = CacheControl(sess)
+        
+          response = cached_sess.get('http://google.com')
+        
+        If the URL contains any caching based headers, it will cache the
+        result in a simple dictionary.
+        
+        For more info, check out the docs_
+        
+        .. _docs: http://cachecontrol.readthedocs.org/en/latest/
+        .. _httplib2: https://github.com/jcgregorio/httplib2
+        .. _requests: http://docs.python-requests.org/
+        
+Keywords: requests http caching web
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Web Environment
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Topic :: Internet :: WWW/HTTP
diff --git a/CacheControl.egg-info/SOURCES.txt b/CacheControl.egg-info/SOURCES.txt
new file mode 100644
index 0000000..61a3577
--- /dev/null
+++ b/CacheControl.egg-info/SOURCES.txt
@@ -0,0 +1,20 @@
+README.rst
+setup.cfg
+setup.py
+CacheControl.egg-info/PKG-INFO
+CacheControl.egg-info/SOURCES.txt
+CacheControl.egg-info/dependency_links.txt
+CacheControl.egg-info/requires.txt
+CacheControl.egg-info/top_level.txt
+cachecontrol/__init__.py
+cachecontrol/adapter.py
+cachecontrol/cache.py
+cachecontrol/compat.py
+cachecontrol/controller.py
+cachecontrol/filewrapper.py
+cachecontrol/heuristics.py
+cachecontrol/serialize.py
+cachecontrol/wrapper.py
+cachecontrol/caches/__init__.py
+cachecontrol/caches/file_cache.py
+cachecontrol/caches/redis_cache.py
\ No newline at end of file
diff --git a/CacheControl.egg-info/dependency_links.txt b/CacheControl.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/CacheControl.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/CacheControl.egg-info/requires.txt b/CacheControl.egg-info/requires.txt
new file mode 100644
index 0000000..f330760
--- /dev/null
+++ b/CacheControl.egg-info/requires.txt
@@ -0,0 +1,4 @@
+requests
+
+[filecache]
+lockfile
diff --git a/CacheControl.egg-info/top_level.txt b/CacheControl.egg-info/top_level.txt
new file mode 100644
index 0000000..af37ac6
--- /dev/null
+++ b/CacheControl.egg-info/top_level.txt
@@ -0,0 +1 @@
+cachecontrol
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..2ee2bc4
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,64 @@
+Metadata-Version: 1.1
+Name: CacheControl
+Version: 0.11.3
+Summary: httplib2 caching for requests
+Home-page: https://github.com/ionrock/cachecontrol
+Author: Eric Larson
+Author-email: eric at ionrock.org
+License: UNKNOWN
+Description: ==============
+         CacheControl
+        ==============
+        
+        .. image:: https://pypip.in/version/cachecontrol/badge.svg
+            :target: https://pypi.python.org/pypi/cachecontrol/
+            :alt: Latest Version
+        
+        .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+          :target: https://travis-ci.org/ionrock/cachecontrol
+        
+        CacheControl is a port of the caching algorithms in httplib2_ for use with
+        requests_ session object.
+        
+        It was written because httplib2's better support for caching is often
+        mitigated by its lack of threadsafety. The same is true of requests in
+        terms of caching.
+        
+        
+        Quickstart
+        ==========
+        
+        .. code-block:: python
+        
+          import requests
+        
+          from cachecontrol import CacheControl
+        
+        
+          sess = requests.session()
+          cached_sess = CacheControl(sess)
+        
+          response = cached_sess.get('http://google.com')
+        
+        If the URL contains any caching based headers, it will cache the
+        result in a simple dictionary.
+        
+        For more info, check out the docs_
+        
+        .. _docs: http://cachecontrol.readthedocs.org/en/latest/
+        .. _httplib2: https://github.com/jcgregorio/httplib2
+        .. _requests: http://docs.python-requests.org/
+        
+Keywords: requests http caching web
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Web Environment
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Topic :: Internet :: WWW/HTTP
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..1e790a0
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,42 @@
+==============
+ CacheControl
+==============
+
+.. image:: https://pypip.in/version/cachecontrol/badge.svg
+    :target: https://pypi.python.org/pypi/cachecontrol/
+    :alt: Latest Version
+
+.. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
+  :target: https://travis-ci.org/ionrock/cachecontrol
+
+CacheControl is a port of the caching algorithms in httplib2_ for use with
+requests_ session object.
+
+It was written because httplib2's better support for caching is often
+mitigated by its lack of threadsafety. The same is true of requests in
+terms of caching.
+
+
+Quickstart
+==========
+
+.. code-block:: python
+
+  import requests
+
+  from cachecontrol import CacheControl
+
+
+  sess = requests.session()
+  cached_sess = CacheControl(sess)
+
+  response = cached_sess.get('http://google.com')
+
+If the URL contains any caching based headers, it will cache the
+result in a simple dictionary.
+
+For more info, check out the docs_
+
+.. _docs: http://cachecontrol.readthedocs.org/en/latest/
+.. _httplib2: https://github.com/jcgregorio/httplib2
+.. _requests: http://docs.python-requests.org/
diff --git a/cachecontrol/__init__.py b/cachecontrol/__init__.py
new file mode 100644
index 0000000..4cb8ff5
--- /dev/null
+++ b/cachecontrol/__init__.py
@@ -0,0 +1,11 @@
+"""CacheControl import Interface.
+
+Make it easy to import from cachecontrol without long namespaces.
+"""
+__author__ = 'Eric Larson'
+__email__ = 'eric at ionrock.org'
+__version__ = '0.11.3'
+
+from .wrapper import CacheControl
+from .adapter import CacheControlAdapter
+from .controller import CacheController
diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py
new file mode 100644
index 0000000..54f1b51
--- /dev/null
+++ b/cachecontrol/adapter.py
@@ -0,0 +1,117 @@
+import functools
+
+from requests.adapters import HTTPAdapter
+
+from .controller import CacheController
+from .cache import DictCache
+from .filewrapper import CallbackFileWrapper
+
+
+class CacheControlAdapter(HTTPAdapter):
+    invalidating_methods = set(['PUT', 'DELETE'])
+
+    def __init__(self, cache=None,
+                 cache_etags=True,
+                 controller_class=None,
+                 serializer=None,
+                 heuristic=None,
+                 *args, **kw):
+        super(CacheControlAdapter, self).__init__(*args, **kw)
+        self.cache = cache or DictCache()
+        self.heuristic = heuristic
+
+        controller_factory = controller_class or CacheController
+        self.controller = controller_factory(
+            self.cache,
+            cache_etags=cache_etags,
+            serializer=serializer,
+        )
+
+    def send(self, request, **kw):
+        """
+        Send a request. Use the request information to see if it
+        exists in the cache and cache the response if we need to and can.
+        """
+        if request.method == 'GET':
+            cached_response = self.controller.cached_request(request)
+            if cached_response:
+                return self.build_response(request, cached_response,
+                                           from_cache=True)
+
+            # check for etags and add headers if appropriate
+            request.headers.update(
+                self.controller.conditional_headers(request)
+            )
+
+        resp = super(CacheControlAdapter, self).send(request, **kw)
+
+        return resp
+
+    def build_response(self, request, response, from_cache=False):
+        """
+        Build a response by making a request or using the cache.
+
+        This will end up calling send and returning a potentially
+        cached response
+        """
+        if not from_cache and request.method == 'GET':
+
+            # apply any expiration heuristics
+            if response.status == 304:
+                # We must have sent an ETag request. This could mean
+                # that we've been expired already or that we simply
+                # have an etag. In either case, we want to try and
+                # update the cache if that is the case.
+                cached_response = self.controller.update_cached_response(
+                    request, response
+                )
+
+                if cached_response is not response:
+                    from_cache = True
+
+                # We are done with the server response, read a
+                # possible response body (compliant servers will
+                # not return one, but we cannot be 100% sure) and
+                # release the connection back to the pool.
+                response.read(decode_content=False)
+                response.release_conn()
+
+                response = cached_response
+
+            # We always cache the 301 responses
+            elif response.status == 301:
+                self.controller.cache_response(request, response)
+            else:
+                # Check for any heuristics that might update headers
+                # before trying to cache.
+                if self.heuristic:
+                    response = self.heuristic.apply(response)
+
+                # Wrap the response file with a wrapper that will cache the
+                #   response when the stream has been consumed.
+                response._fp = CallbackFileWrapper(
+                    response._fp,
+                    functools.partial(
+                        self.controller.cache_response,
+                        request,
+                        response,
+                    )
+                )
+
+        resp = super(CacheControlAdapter, self).build_response(
+            request, response
+        )
+
+        # See if we should invalidate the cache.
+        if request.method in self.invalidating_methods and resp.ok:
+            cache_url = self.controller.cache_url(request.url)
+            self.cache.delete(cache_url)
+
+        # Give the request a from_cache attr to let people use it
+        resp.from_cache = from_cache
+
+        return resp
+
+    def close(self):
+        self.cache.close()
+        super(CacheControlAdapter, self).close()
diff --git a/cachecontrol/cache.py b/cachecontrol/cache.py
new file mode 100644
index 0000000..7389a73
--- /dev/null
+++ b/cachecontrol/cache.py
@@ -0,0 +1,39 @@
+"""
+The cache object API for implementing caches. The default is a thread
+safe in-memory dictionary.
+"""
+from threading import Lock
+
+
+class BaseCache(object):
+
+    def get(self, key):
+        raise NotImplemented()
+
+    def set(self, key, value):
+        raise NotImplemented()
+
+    def delete(self, key):
+        raise NotImplemented()
+
+    def close(self):
+        pass
+
+
+class DictCache(BaseCache):
+
+    def __init__(self, init_dict=None):
+        self.lock = Lock()
+        self.data = init_dict or {}
+
+    def get(self, key):
+        return self.data.get(key, None)
+
+    def set(self, key, value):
+        with self.lock:
+            self.data.update({key: value})
+
+    def delete(self, key):
+        with self.lock:
+            if key in self.data:
+                self.data.pop(key)
diff --git a/cachecontrol/caches/__init__.py b/cachecontrol/caches/__init__.py
new file mode 100644
index 0000000..f9e66a1
--- /dev/null
+++ b/cachecontrol/caches/__init__.py
@@ -0,0 +1,18 @@
+from textwrap import dedent
+
+try:
+    from .file_cache import FileCache
+except ImportError:
+    notice = dedent('''
+    NOTE: In order to use the FileCache you must have
+    lockfile installed. You can install it via pip:
+      pip install lockfile
+    ''')
+    print(notice)
+
+
+try:
+    import redis
+    from .redis_cache import RedisCache
+except ImportError:
+    pass
diff --git a/cachecontrol/caches/file_cache.py b/cachecontrol/caches/file_cache.py
new file mode 100644
index 0000000..fd12c8a
--- /dev/null
+++ b/cachecontrol/caches/file_cache.py
@@ -0,0 +1,103 @@
+import hashlib
+import os
+
+from lockfile import FileLock
+
+from ..cache import BaseCache
+from ..controller import CacheController
+
+
+def _secure_open_write(filename, fmode):
+    # We only want to write to this file, so open it in write only mode
+    flags = os.O_WRONLY
+
+    # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
+    #  will open *new* files.
+    # We specify this because we want to ensure that the mode we pass is the
+    # mode of the file.
+    flags |= os.O_CREAT | os.O_EXCL
+
+    # Do not follow symlinks to prevent someone from making a symlink that
+    # we follow and insecurely open a cache file.
+    if hasattr(os, "O_NOFOLLOW"):
+        flags |= os.O_NOFOLLOW
+
+    # On Windows we'll mark this file as binary
+    if hasattr(os, "O_BINARY"):
+        flags |= os.O_BINARY
+
+    # Before we open our file, we want to delete any existing file that is
+    # there
+    try:
+        os.remove(filename)
+    except (IOError, OSError):
+        # The file must not exist already, so we can just skip ahead to opening
+        pass
+
+    # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
+    # race condition happens between the os.remove and this line, that an
+    # error will be raised. Because we utilize a lockfile this should only
+    # happen if someone is attempting to attack us.
+    fd = os.open(filename, flags, fmode)
+    try:
+        return os.fdopen(fd, "wb")
+    except:
+        # An error occurred wrapping our FD in a file object
+        os.close(fd)
+        raise
+
+
+class FileCache(BaseCache):
+    def __init__(self, directory, forever=False, filemode=0o0600,
+                 dirmode=0o0700):
+        self.directory = directory
+        self.forever = forever
+        self.filemode = filemode
+        self.dirmode = dirmode
+
+    @staticmethod
+    def encode(x):
+        return hashlib.sha224(x.encode()).hexdigest()
+
+    def _fn(self, name):
+        # NOTE: This method should not change as some may depend on it.
+        #       See: https://github.com/ionrock/cachecontrol/issues/63
+        hashed = self.encode(name)
+        parts = list(hashed[:5]) + [hashed]
+        return os.path.join(self.directory, *parts)
+
+    def get(self, key):
+        name = self._fn(key)
+        if not os.path.exists(name):
+            return None
+
+        with open(name, 'rb') as fh:
+            return fh.read()
+
+    def set(self, key, value):
+        name = self._fn(key)
+
+        # Make sure the directory exists
+        try:
+            os.makedirs(os.path.dirname(name), self.dirmode)
+        except (IOError, OSError):
+            pass
+
+        with FileLock(name) as lock:
+            # Write our actual file
+            with _secure_open_write(lock.path, self.filemode) as fh:
+                fh.write(value)
+
+    def delete(self, key):
+        name = self._fn(key)
+        if not self.forever:
+            os.remove(name)
+
+
+def url_to_file_path(url, filecache):
+    """Return the file cache path based on the URL.
+
+    This does not ensure the file exists!
+    """
+    key = CacheController.cache_url(url)
+    return filecache._fn(key)
diff --git a/cachecontrol/caches/redis_cache.py b/cachecontrol/caches/redis_cache.py
new file mode 100644
index 0000000..9f5d55f
--- /dev/null
+++ b/cachecontrol/caches/redis_cache.py
@@ -0,0 +1,41 @@
+from __future__ import division
+
+from datetime import datetime
+
+
+def total_seconds(td):
+    """Python 2.6 compatability"""
+    if hasattr(td, 'total_seconds'):
+        return td.total_seconds()
+
+    ms = td.microseconds
+    secs = (td.seconds + td.days * 24 * 3600)
+    return (ms + secs * 10**6) / 10**6
+
+
+class RedisCache(object):
+
+    def __init__(self, conn):
+        self.conn = conn
+
+    def get(self, key):
+        return self.conn.get(key)
+
+    def set(self, key, value, expires=None):
+        if not expires:
+            self.conn.set(key, value)
+        else:
+            expires = expires - datetime.now()
+            self.conn.setex(key, total_seconds(expires), value)
+
+    def delete(self, key):
+        self.conn.delete(key)
+
+    def clear(self):
+        """Helper for clearing all the keys in a database. Use with
+        caution!"""
+        for key in self.conn.keys():
+            self.conn.delete(key)
+
+    def close(self):
+        self.conn.disconnect()
diff --git a/cachecontrol/compat.py b/cachecontrol/compat.py
new file mode 100644
index 0000000..489eb86
--- /dev/null
+++ b/cachecontrol/compat.py
@@ -0,0 +1,23 @@
+try:
+    from urllib.parse import urljoin
+except ImportError:
+    from urlparse import urljoin
+
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+# Handle the case where the requests module has been patched to not have
+# urllib3 bundled as part of its source.
+try:
+    from requests.packages.urllib3.response import HTTPResponse
+except ImportError:
+    from urllib3.response import HTTPResponse
+
+try:
+    from requests.packages.urllib3.util import is_fp_closed
+except ImportError:
+    from urllib3.util import is_fp_closed
diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py
new file mode 100644
index 0000000..f038074
--- /dev/null
+++ b/cachecontrol/controller.py
@@ -0,0 +1,299 @@
+"""
+The httplib2 algorithms ported for use with requests.
+"""
+import re
+import calendar
+import time
+from email.utils import parsedate_tz
+
+from requests.structures import CaseInsensitiveDict
+
+from .cache import DictCache
+from .serialize import Serializer
+
+
+URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
+
+
+def parse_uri(uri):
+    """Parses a URI using the regex given in Appendix B of RFC 3986.
+
+        (scheme, authority, path, query, fragment) = parse_uri(uri)
+    """
+    groups = URI.match(uri).groups()
+    return (groups[1], groups[3], groups[4], groups[6], groups[8])
+
+
+class CacheController(object):
+    """An interface to see if request should cached or not.
+    """
+    def __init__(self, cache=None, cache_etags=True, serializer=None):
+        self.cache = cache or DictCache()
+        self.cache_etags = cache_etags
+        self.serializer = serializer or Serializer()
+
+    @classmethod
+    def _urlnorm(cls, uri):
+        """Normalize the URL to create a safe key for the cache"""
+        (scheme, authority, path, query, fragment) = parse_uri(uri)
+        if not scheme or not authority:
+            raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+
+        scheme = scheme.lower()
+        authority = authority.lower()
+
+        if not path:
+            path = "/"
+
+        # Could do syntax based normalization of the URI before
+        # computing the digest. See Section 6.2.2 of Std 66.
+        request_uri = query and "?".join([path, query]) or path
+        defrag_uri = scheme + "://" + authority + request_uri
+
+        return defrag_uri
+
+    @classmethod
+    def cache_url(cls, uri):
+        return cls._urlnorm(uri)
+
+    def parse_cache_control(self, headers):
+        """
+        Parse the cache control headers returning a dictionary with values
+        for the different directives.
+        """
+        retval = {}
+
+        cc_header = 'cache-control'
+        if 'Cache-Control' in headers:
+            cc_header = 'Cache-Control'
+
+        if cc_header in headers:
+            parts = headers[cc_header].split(',')
+            parts_with_args = [
+                tuple([x.strip().lower() for x in part.split("=", 1)])
+                for part in parts if -1 != part.find("=")
+            ]
+            parts_wo_args = [
+                (name.strip().lower(), 1)
+                for name in parts if -1 == name.find("=")
+            ]
+            retval = dict(parts_with_args + parts_wo_args)
+        return retval
+
+    def cached_request(self, request):
+        """
+        Return a cached response if it exists in the cache, otherwise
+        return False.
+        """
+        cache_url = self.cache_url(request.url)
+        cc = self.parse_cache_control(request.headers)
+
+        # non-caching states
+        no_cache = True if 'no-cache' in cc else False
+        if 'max-age' in cc and cc['max-age'] == 0:
+            no_cache = True
+
+        # Bail out if no-cache was set
+        if no_cache:
+            return False
+
+        # It is in the cache, so lets see if it is going to be
+        # fresh enough
+        resp = self.serializer.loads(request, self.cache.get(cache_url))
+
+        # Check to see if we have a cached object
+        if not resp:
+            return False
+
+        # If we have a cached 301, return it immediately. We don't
+        # need to test our response for other headers b/c it is
+        # intrinsically "cacheable" as it is Permanent.
+        # See:
+        #   https://tools.ietf.org/html/rfc7231#section-6.4.2
+        #
+        # Client can try to refresh the value by repeating the request
+        # with cache busting headers as usual (ie no-cache).
+        if resp.status == 301:
+            return resp
+
+        headers = CaseInsensitiveDict(resp.headers)
+        if not headers or 'date' not in headers:
+            # With date or etag, the cached response can never be used
+            # and should be deleted.
+            if 'etag' not in headers:
+                self.cache.delete(cache_url)
+            return False
+
+        now = time.time()
+        date = calendar.timegm(
+            parsedate_tz(headers['date'])
+        )
+        current_age = max(0, now - date)
+
+        # TODO: There is an assumption that the result will be a
+        #       urllib3 response object. This may not be best since we
+        #       could probably avoid instantiating or constructing the
+        #       response until we know we need it.
+        resp_cc = self.parse_cache_control(headers)
+
+        # determine freshness
+        freshness_lifetime = 0
+
+        # Check the max-age pragma in the cache control header
+        if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
+            freshness_lifetime = int(resp_cc['max-age'])
+
+        # If there isn't a max-age, check for an expires header
+        elif 'expires' in headers:
+            expires = parsedate_tz(headers['expires'])
+            if expires is not None:
+                expire_time = calendar.timegm(expires) - date
+                freshness_lifetime = max(0, expire_time)
+
+        # determine if we are setting freshness limit in the req
+        if 'max-age' in cc:
+            try:
+                freshness_lifetime = int(cc['max-age'])
+            except ValueError:
+                freshness_lifetime = 0
+
+        if 'min-fresh' in cc:
+            try:
+                min_fresh = int(cc['min-fresh'])
+            except ValueError:
+                min_fresh = 0
+            # adjust our current age by our min fresh
+            current_age += min_fresh
+
+        # see how fresh we actually are
+        fresh = (freshness_lifetime > current_age)
+
+        if fresh:
+            return resp
+
+        # we're not fresh. If we don't have an Etag, clear it out
+        if 'etag' not in headers:
+            self.cache.delete(cache_url)
+
+        # return the original handler
+        return False
+
+    def conditional_headers(self, request):
+        cache_url = self.cache_url(request.url)
+        resp = self.serializer.loads(request, self.cache.get(cache_url))
+        new_headers = {}
+
+        if resp:
+            headers = CaseInsensitiveDict(resp.headers)
+
+            if 'etag' in headers:
+                new_headers['If-None-Match'] = headers['ETag']
+
+            if 'last-modified' in headers:
+                new_headers['If-Modified-Since'] = headers['Last-Modified']
+
+        return new_headers
+
+    def cache_response(self, request, response, body=None):
+        """
+        Algorithm for caching requests.
+
+        This assumes a requests Response object.
+        """
+        # From httplib2: Don't cache 206's since we aren't going to
+        #                handle byte range requests
+        if response.status not in [200, 203, 300, 301]:
+            return
+
+        response_headers = CaseInsensitiveDict(response.headers)
+
+        cc_req = self.parse_cache_control(request.headers)
+        cc = self.parse_cache_control(response_headers)
+
+        cache_url = self.cache_url(request.url)
+
+        # Delete it from the cache if we happen to have it stored there
+        no_store = cc.get('no-store') or cc_req.get('no-store')
+        if no_store and self.cache.get(cache_url):
+            self.cache.delete(cache_url)
+
+        # If we've been given an etag, then keep the response
+        if self.cache_etags and 'etag' in response_headers:
+            self.cache.set(
+                cache_url,
+                self.serializer.dumps(request, response, body=body),
+            )
+
+        # Add to the cache any 301s. We do this before looking that
+        # the Date headers.
+        elif response.status == 301:
+            self.cache.set(
+                cache_url,
+                self.serializer.dumps(request, response)
+            )
+
+        # Add to the cache if the response headers demand it. If there
+        # is no date header then we can't do anything about expiring
+        # the cache.
+        elif 'date' in response_headers:
+            # cache when there is a max-age > 0
+            if cc and cc.get('max-age'):
+                if int(cc['max-age']) > 0:
+                    self.cache.set(
+                        cache_url,
+                        self.serializer.dumps(request, response, body=body),
+                    )
+
+            # If the request can expire, it means we should cache it
+            # in the meantime.
+            elif 'expires' in response_headers:
+                if response_headers['expires']:
+                    self.cache.set(
+                        cache_url,
+                        self.serializer.dumps(request, response, body=body),
+                    )
+
+    def update_cached_response(self, request, response):
+        """On a 304 we will get a new set of headers that we want to
+        update our cached value with, assuming we have one.
+
+        This should only ever be called when we've sent an ETag and
+        gotten a 304 as the response.
+        """
+        cache_url = self.cache_url(request.url)
+
+        cached_response = self.serializer.loads(
+            request,
+            self.cache.get(cache_url)
+        )
+
+        if not cached_response:
+            # we didn't have a cached response
+            return response
+
+        # Lets update our headers with the headers from the new request:
+        # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
+        #
+        # The server isn't supposed to send headers that would make
+        # the cached body invalid. But... just in case, we'll be sure
+        # to strip out ones we know that might be problmatic due to
+        # typical assumptions.
+        excluded_headers = [
+            "content-length",
+        ]
+
+        cached_response.headers.update(
+            dict((k, v) for k, v in response.headers.items()
+                 if k.lower() not in excluded_headers)
+        )
+
+        # we want a 200 b/c we have content via the cache
+        cached_response.status = 200
+
+        # update our cache
+        self.cache.set(
+            cache_url,
+            self.serializer.dumps(request, cached_response),
+        )
+
+        return cached_response
diff --git a/cachecontrol/filewrapper.py b/cachecontrol/filewrapper.py
new file mode 100644
index 0000000..4b91bce
--- /dev/null
+++ b/cachecontrol/filewrapper.py
@@ -0,0 +1,63 @@
+from io import BytesIO
+
+
+class CallbackFileWrapper(object):
+    """
+    Small wrapper around a fp object which will tee everything read into a
+    buffer, and when that file is closed it will execute a callback with the
+    contents of that buffer.
+
+    All attributes are proxied to the underlying file object.
+
+    This class uses members with a double underscore (__) leading prefix so as
+    not to accidentally shadow an attribute.
+    """
+
+    def __init__(self, fp, callback):
+        self.__buf = BytesIO()
+        self.__fp = fp
+        self.__callback = callback
+
+    def __getattr__(self, name):
+        # The vaguaries of garbage collection means that self.__fp is
+        # not always set.  By using __getattribute__ and the private
+        # name[0] allows looking up the attribute value and raising an
+        # AttributeError when it doesn't exist. This stop thigns from
+        # infinitely recursing calls to getattr in the case where
+        # self.__fp hasn't been set.
... 453 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-cachecontrol.git



More information about the Python-modules-commits mailing list