[Checkins] SVN: zope.browserresource/branches/mgedmin-etag-support/ Support the HTTP Etag header for file resources.

Marius Gedminas marius at pov.lt
Mon Aug 9 14:10:53 EDT 2010


Log message for revision 115596:
  Support the HTTP Etag header for file resources.
  
  

Changed:
  U   zope.browserresource/branches/mgedmin-etag-support/CHANGES.txt
  U   zope.browserresource/branches/mgedmin-etag-support/setup.py
  U   zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/file.py
  U   zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/tests/test_file.py

-=-
Modified: zope.browserresource/branches/mgedmin-etag-support/CHANGES.txt
===================================================================
--- zope.browserresource/branches/mgedmin-etag-support/CHANGES.txt	2010-08-09 18:09:58 UTC (rev 115595)
+++ zope.browserresource/branches/mgedmin-etag-support/CHANGES.txt	2010-08-09 18:10:53 UTC (rev 115596)
@@ -2,9 +2,10 @@
 CHANGES
 =======
 
-3.10.4 (unreleased)
+3.11.0 (unreleased)
 ===================
 
+- Support the HTTP Etag header for file resources.
 
 3.10.3 (2010-04-30)
 ===================

Modified: zope.browserresource/branches/mgedmin-etag-support/setup.py
===================================================================
--- zope.browserresource/branches/mgedmin-etag-support/setup.py	2010-08-09 18:09:58 UTC (rev 115595)
+++ zope.browserresource/branches/mgedmin-etag-support/setup.py	2010-08-09 18:10:53 UTC (rev 115596)
@@ -19,7 +19,7 @@
                     open('CHANGES.txt').read())
 
 setup(name='zope.browserresource',
-      version = '3.10.4dev',
+      version = '3.11.0dev',
       url='http://pypi.python.org/pypi/zope.browserresource/',
       author='Zope Foundation and Contributors',
       author_email='zope-dev at zope.org',

Modified: zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/file.py
===================================================================
--- zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/file.py	2010-08-09 18:09:58 UTC (rev 115595)
+++ zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/file.py	2010-08-09 18:10:53 UTC (rev 115596)
@@ -16,6 +16,7 @@
 
 import os
 import time
+import re
 try:
     from email.utils import formatdate, parsedate_tz, mktime_tz
 except ImportError: # python 2.4
@@ -32,12 +33,88 @@
 from zope.browserresource.interfaces import IResourceFactoryFactory
 
 
+ETAG_RX = re.compile(r'[*]|(?:W/)?"(?:[^"\\]|[\\].)*"')
+
+
+def parse_etags(value):
+    r"""Parse a list of entity tags.
+
+    HTTP/1.1 specifies the following syntax for If-Match/If-None-Match
+    headers::
+
+        If-Match = "If-Match" ":" ( "*" | 1#entity-tag )
+        If-None-Match = "If-None-Match" ":" ( "*" | 1#entity-tag )
+
+        entity-tag = [ weak ] opaque-tag
+
+        weak       = "W/"
+        opaque-tag = quoted-string
+
+        quoted-string  = ( <"> *(qdtext) <"> )
+        qdtext         = <any TEXT except <">>
+
+        The backslash character ("\") may be used as a single-character
+        quoting mechanism only within quoted-string and comment constructs.
+
+    Examples:
+
+        >>> parse_etags('*')
+        ['*']
+
+        >>> parse_etags(r' "qwerty", ,"foo",W/"bar" , "baz","\""')
+        ['"qwerty"', '"foo"', 'W/"bar"', '"baz"', '"\\""']
+
+    Ill-formed headers are ignored
+
+        >>> parse_etags("not an etag at all")
+        []
+
+    """
+    return ETAG_RX.findall(value)
+
+
+def etag_matches(etag, tags):
+    """Check if the entity tag matches any of the given tags.
+
+        >>> etag_matches('"xyzzy"', ['"abc"', '"xyzzy"', 'W/"woof"'])
+        True
+
+        >>> etag_matches('"woof"', ['"abc"', 'W/"woof"'])
+        False
+
+        >>> etag_matches('"xyzzy"', ['*'])
+        True
+
+    Note that you pass quoted etags in both arguments!
+    """
+    for tag in tags:
+        if tag == etag or tag == '*':
+            return True
+    return False
+
+
+def quote_etag(etag):
+    r"""Quote an etag value
+
+        >>> quote_etag("foo")
+        '"foo"'
+
+    Special characters are escaped
+
+        >>> quote_etag('"')
+        '"\\""'
+        >>> quote_etag('\\')
+        '"\\\\"'
+
+    """
+    return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')
+
+
 class File(object):
-    
+
     def __init__(self, path, name):
         self.path = path
         self.__name__ = name
-
         f = open(path, 'rb')
         data = f.read()
         f.close()
@@ -45,6 +122,7 @@
 
         self.lmt = float(os.path.getmtime(path)) or time.time()
         self.lmh = formatdate(self.lmt, usegmt=True)
+        self.etag = '%s-%s' % (self.lmt, len(data))
 
 
 class FileResource(BrowserView, Resource):
@@ -56,7 +134,7 @@
     def publishTraverse(self, request, name):
         '''File resources can't be traversed further, so raise NotFound if
         someone tries to traverse it.
-        
+
           >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
           >>> request = TestRequest()
           >>> resource = factory(request)
@@ -87,23 +165,23 @@
           True
           >>> next == ()
           True
-        
+
         '''
         return getattr(self, request.method), ()
 
     def chooseContext(self):
         '''Choose the appropriate context.
-        
+
         This method can be overriden in subclasses, that need to choose
         appropriate file, based on current request or other condition,
         like, for example, i18n files.
-        
+
         '''
         return self.context
 
     def GET(self):
         '''Return a file data for downloading with GET requests
-        
+
           >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
           >>> request = TestRequest()
           >>> resource = factory(request)
@@ -111,36 +189,7 @@
           True
           >>> request.response.getHeader('Content-Type') == 'text/plain'
           True
-        
-        Let's test If-Modified-Since header support.
 
-          >>> timestamp = time.time()
-        
-          >>> file = factory._FileResourceFactory__file # get mangled file
-          >>> file.lmt = timestamp
-          >>> file.lmh = formatdate(timestamp, usegmt=True)
-
-          >>> before = timestamp - 1000
-          >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(before, usegmt=True))
-          >>> resource = factory(request)
-          >>> bool(resource.GET())
-          True
-
-          >>> after = timestamp + 1000
-          >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(after, usegmt=True))
-          >>> resource = factory(request)
-          >>> bool(resource.GET())
-          False
-          >>> request.response.getStatus()
-          304
-
-        It won't fail on bad If-Modified-Since headers.
-
-          >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE='bad header')
-          >>> resource = factory(request)
-          >>> bool(resource.GET())
-          True
-
         '''
 
         file = self.chooseContext()
@@ -149,11 +198,15 @@
 
         setCacheControl(response, self.cacheTimeout)
 
+        can_return_304 = False
+        all_cache_checks_passed = True
+
         # HTTP If-Modified-Since header handling. This is duplicated
         # from OFS.Image.Image - it really should be consolidated
         # somewhere...
         header = request.getHeader('If-Modified-Since', None)
         if header is not None:
+            can_return_304 = True
             header = header.split(';')[0]
             # Some proxies seem to send invalid date strings for this
             # header. If the date string is not valid, we ignore it
@@ -165,15 +218,35 @@
                 mod_since = long(mktime_tz(parsedate_tz(header)))
             except:
                 mod_since = None
-            if mod_since is not None:
-                if getattr(file, 'lmt', None):
-                    last_mod = long(file.lmt)
-                else:
-                    last_mod = 0L
-                if last_mod > 0 and last_mod <= mod_since:
-                    response.setStatus(304)
-                    return ''
+            if getattr(file, 'lmt', None):
+                last_mod = long(file.lmt)
+            else:
+                last_mod = 0L
+            if mod_since is None or last_mod <= 0 or last_mod > mod_since:
+                all_cache_checks_passed = False
 
+        # HTTP If-None-Match header handling
+        header = request.getHeader('If-None-Match', None)
+        if header is not None:
+            can_return_304 = True
+            etag = getattr(file, 'etag', None)
+            tags = parse_etags(header)
+            if not etag or not etag_matches(quote_etag(etag), tags):
+                all_cache_checks_passed = False
+
+        # 304 responses MUST contain ETag, if one would've been sent with
+        # a 200 response
+        if file.etag:
+            response.setHeader('ETag', quote_etag(file.etag))
+
+        if can_return_304 and all_cache_checks_passed:
+            response.setStatus(304)
+            return ''
+
+        # 304 responses SHOULD NOT or MUST NOT include other entity headers,
+        # depending on whether the conditional GET used a strong or a weak
+        # validator.  We only use strong validators, which makes it SHOULD
+        # NOT.
         response.setHeader('Content-Type', file.content_type)
         response.setHeader('Last-Modified', file.lmh)
 
@@ -185,7 +258,7 @@
 
     def HEAD(self):
         '''Return proper headers and no content for HEAD requests
-        
+
           >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
           >>> request = TestRequest()
           >>> resource = factory(request)
@@ -199,6 +272,8 @@
         response = self.request.response
         response.setHeader('Content-Type', file.content_type)
         response.setHeader('Last-Modified', file.lmh)
+        if file.etag:
+            response.setHeader('ETag', file.etag)
         setCacheControl(response, self.cacheTimeout)
         return ''
 

Modified: zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/tests/test_file.py
===================================================================
--- zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/tests/test_file.py	2010-08-09 18:09:58 UTC (rev 115595)
+++ zope.browserresource/branches/mgedmin-etag-support/src/zope/browserresource/tests/test_file.py	2010-08-09 18:10:53 UTC (rev 115596)
@@ -17,12 +17,19 @@
 import doctest
 import os
 import unittest
+import time
+try:
+    from email.utils import formatdate, parsedate_tz, mktime_tz
+except ImportError: # python 2.4
+    from email.Utils import formatdate, parsedate_tz, mktime_tz
+
 from zope.testing import cleanup
-
 from zope.publisher.browser import TestRequest
 from zope.security.checker import NamesChecker
 
+from zope.browserresource.file import FileResourceFactory
 
+
 def setUp(test):
     cleanup.setUp()
     data_dir = os.path.join(os.path.dirname(__file__), 'testfiles')
@@ -35,10 +42,209 @@
 def tearDown(test):
     cleanup.tearDown()
 
+
+def doctest_FileResource_GET_sets_cache_headers():
+    """Test caching headers set by FileResource.GET
+
+        >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
+
+        >>> timestamp = time.time()
+
+        >>> file = factory._FileResourceFactory__file # get mangled file
+        >>> file.lmt = timestamp
+        >>> file.lmh = formatdate(timestamp, usegmt=True)
+        >>> file.etag = 'myetag'
+
+        >>> request = TestRequest()
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+        >>> request.response.getHeader('Last-Modified') == file.lmh
+        True
+        >>> request.response.getHeader('ETag')
+        '"myetag"'
+        >>> request.response.getHeader('Cache-Control')
+        'public,max-age=86400'
+        >>> bool(request.response.getHeader('Expires'))
+        True
+
+    """
+
+
+def doctest_FileResource_GET_if_modified_since():
+    """Test If-Modified-Since header support
+
+        >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
+
+        >>> timestamp = time.time()
+
+        >>> file = factory._FileResourceFactory__file # get mangled file
+        >>> file.lmt = timestamp
+        >>> file.lmh = formatdate(timestamp, usegmt=True)
+        >>> file.etag = 'myetag'
+
+        >>> before = timestamp - 1000
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(before, usegmt=True))
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+        >>> after = timestamp + 1000
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(after, usegmt=True))
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        False
+        >>> request.response.getStatus()
+        304
+
+    Cache control headers and ETag are set on 304 responses
+
+        >>> request.response.getHeader('ETag')
+        '"myetag"'
+        >>> request.response.getHeader('Cache-Control')
+        'public,max-age=86400'
+        >>> bool(request.response.getHeader('Expires'))
+        True
+
+    Other entity headers are not
+
+        >>> request.response.getHeader('Last-Modified')
+        >>> request.response.getHeader('Content-Type')
+
+    It won't fail on bad If-Modified-Since headers.
+
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE='bad header')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    it also won't fail if we don't have a last modification time for the
+    resource
+
+        >>> file.lmt = None
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(after, usegmt=True))
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    """
+
+
+def doctest_FileResource_GET_if_none_match():
+    """Test If-None-Match header support
+
+        >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
+
+        >>> timestamp = time.time()
+
+        >>> file = factory._FileResourceFactory__file # get mangled file
+        >>> file.lmt = timestamp
+        >>> file.lmh = formatdate(timestamp, usegmt=True)
+        >>> file.etag = 'myetag'
+
+        >>> request = TestRequest(HTTP_IF_NONE_MATCH='"othertag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+        >>> request = TestRequest(HTTP_IF_NONE_MATCH='"myetag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        False
+        >>> request.response.getStatus()
+        304
+
+    Cache control headers and ETag are set on 304 responses
+
+        >>> request.response.getHeader('ETag')
+        '"myetag"'
+        >>> request.response.getHeader('Cache-Control')
+        'public,max-age=86400'
+        >>> bool(request.response.getHeader('Expires'))
+        True
+
+    Other entity headers are not
+
+        >>> request.response.getHeader('Last-Modified')
+        >>> request.response.getHeader('Content-Type')
+
+    It won't fail on bad If-None-Match headers.
+
+        >>> request = TestRequest(HTTP_IF_NONE_MATCH='bad header')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    it also won't fail if we don't have an etag for the resource
+
+        >>> file.etag = None
+        >>> request = TestRequest(HTTP_IF_NONE_MATCH='"someetag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    """
+
+
+def doctest_FileResource_GET_if_none_match_and_if_modified_since():
+    """Test combined If-None-Match and If-Modified-Since header support
+
+        >>> factory = FileResourceFactory(testFilePath, nullChecker, 'test.txt')
+
+        >>> timestamp = time.time()
+
+        >>> file = factory._FileResourceFactory__file # get mangled file
+        >>> file.lmt = timestamp
+        >>> file.lmh = formatdate(timestamp, usegmt=True)
+        >>> file.etag = 'myetag'
+
+    We've a match
+
+        >>> after = timestamp + 1000
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(after, usegmt=True),
+        ...                       HTTP_IF_NONE_MATCH='"myetag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        False
+        >>> request.response.getStatus()
+        304
+
+    Last-modified matches, but ETag doesn't
+
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(after, usegmt=True),
+        ...                       HTTP_IF_NONE_MATCH='"otheretag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    ETag matches but last-modified doesn't
+
+        >>> before = timestamp - 1000
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(before, usegmt=True),
+        ...                       HTTP_IF_NONE_MATCH='"myetag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    Both don't match
+
+        >>> before = timestamp - 1000
+        >>> request = TestRequest(HTTP_IF_MODIFIED_SINCE=formatdate(before, usegmt=True),
+        ...                       HTTP_IF_NONE_MATCH='"otheretag"')
+        >>> resource = factory(request)
+        >>> bool(resource.GET())
+        True
+
+    """
+
 def test_suite():
     return unittest.TestSuite((
         doctest.DocTestSuite(
             'zope.browserresource.file',
             setUp=setUp, tearDown=tearDown,
             optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE),
+        doctest.DocTestSuite(
+            setUp=setUp, tearDown=tearDown,
+            optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE),
         ))



More information about the checkins mailing list