[Checkins] SVN: zc.buildout/trunk/ merged the tlotze-download-api branch

Thomas Lotze tl at gocept.com
Tue Jul 21 07:37:28 EDT 2009


Log message for revision 102051:
  merged the tlotze-download-api branch

Changed:
  U   zc.buildout/trunk/CHANGES.txt
  U   zc.buildout/trunk/setup.py
  U   zc.buildout/trunk/src/zc/buildout/buildout.py
  A   zc.buildout/trunk/src/zc/buildout/download.py
  A   zc.buildout/trunk/src/zc/buildout/download.txt
  A   zc.buildout/trunk/src/zc/buildout/extends-cache.txt
  U   zc.buildout/trunk/src/zc/buildout/testing.py
  U   zc.buildout/trunk/src/zc/buildout/tests.py

-=-
Modified: zc.buildout/trunk/CHANGES.txt
===================================================================
--- zc.buildout/trunk/CHANGES.txt	2009-07-20 21:28:23 UTC (rev 102050)
+++ zc.buildout/trunk/CHANGES.txt	2009-07-21 11:37:27 UTC (rev 102051)
@@ -1,9 +1,18 @@
 Change History
 **************
 
+1.4.0 (unreleased)
+==================
+
 - Added annotate command for annotated sections. Displays sections key-value pairs
   along with the value origin.
 
+- Added a download API that handles the download cache, offline mode etc and
+  is meant to be reused by recipes.
+
+- Used the download API to allow caching of base configurations (specified by
+  the buildout section's 'extends' option).
+
 1.3.0 (2009-06-22)
 ==================
 

Modified: zc.buildout/trunk/setup.py
===================================================================
--- zc.buildout/trunk/setup.py	2009-07-20 21:28:23 UTC (rev 102050)
+++ zc.buildout/trunk/setup.py	2009-07-21 11:37:27 UTC (rev 102051)
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2006 Zope Corporation and Contributors.
+# Copyright (c) 2006-2009 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -12,7 +12,7 @@
 #
 ##############################################################################
 
-version = "1.3.1dev"
+version = "1.4.0dev"
 
 import os
 from setuptools import setup, find_packages
@@ -32,8 +32,12 @@
         + '\n' +
         read('src', 'zc', 'buildout', 'repeatable.txt')
         + '\n' +
+        read('src', 'zc', 'buildout', 'download.txt')
+        + '\n' +
         read('src', 'zc', 'buildout', 'downloadcache.txt')
         + '\n' +
+        read('src', 'zc', 'buildout', 'extends-cache.txt')
+        + '\n' +
         read('src', 'zc', 'buildout', 'setup.txt')
         + '\n' +
         read('src', 'zc', 'buildout', 'update.txt')

Modified: zc.buildout/trunk/src/zc/buildout/buildout.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/buildout.py	2009-07-20 21:28:23 UTC (rev 102050)
+++ zc.buildout/trunk/src/zc/buildout/buildout.py	2009-07-21 11:37:27 UTC (rev 102051)
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2005 Zope Corporation and Contributors.
+# Copyright (c) 2005-2009 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -25,7 +25,6 @@
 import cStringIO
 import sys
 import tempfile
-import urllib2
 import ConfigParser
 import UserDict
 import glob
@@ -34,6 +33,7 @@
 
 import pkg_resources
 import zc.buildout
+import zc.buildout.download
 import zc.buildout.easy_install
 
 from rmtree import rmtree
@@ -156,17 +156,22 @@
         else:
             base = None
 
+        override = dict((option, (value, 'COMMAND_LINE_VALUE'))
+                        for section, option, value in cloptions
+                        if section == 'buildout')
+
         # load user defaults, which override defaults
         if user_defaults:
             user_config = os.path.join(os.path.expanduser('~'),
                                        '.buildout', 'default.cfg')
             if os.path.exists(user_config):
                 _update(data, _open(os.path.dirname(user_config), user_config,
-                                    []))
+                                    [], data['buildout'].copy(), override))
 
         # load configuration files
         if config_file:
-            _update(data, _open(os.path.dirname(config_file), config_file, []))
+            _update(data, _open(os.path.dirname(config_file), config_file, [],
+                                data['buildout'].copy(), override))
 
         # apply command-line options
         for (section, option, value) in cloptions:
@@ -174,7 +179,6 @@
             if options is None:
                 options = data[section] = {}
             options[option] = value, "COMMAND_LINE_VALUE"
-                # The egg dire
 
         self._annotated = copy.deepcopy(data)
         self._raw = _unannotate(data)
@@ -313,6 +317,11 @@
         for name in _buildout_default_options:
             options[name]
 
+        # Do the same for extends-cache which is not among the defaults but
+        # wasn't recognized as having been used since it was used before
+        # tracking was turned on.
+        options.get('extends-cache')
+
         os.chdir(options['directory'])
 
     def _buildout_path(self, name):
@@ -1214,14 +1223,18 @@
     for option, value in items:
         _save_option(option, value, f)
 
-def _open(base, filename, seen):
+def _open(base, filename, seen, dl_options, override):
     """Open a configuration file and return the result as a dictionary,
 
     Recursively open other files based on buildout options found.
     """
-
+    _update_section(dl_options, override)
+    _dl_options = _unannotate_section(dl_options.copy())
+    download = zc.buildout.download.Download(
+        _dl_options, cache=_dl_options.get('extends-cache'), fallback=True,
+        hash_name=True)
     if _isurl(filename):
-        fp = urllib2.urlopen(filename)
+        fp = open(download(filename))
         base = filename[:filename.rfind('/')]
     elif _isurl(base):
         if os.path.isabs(filename):
@@ -1229,7 +1242,7 @@
             base = os.path.dirname(filename)
         else:
             filename = base + '/' + filename
-            fp = urllib2.urlopen(filename)
+            fp = open(download(filename))
             base = filename[:filename.rfind('/')]
     else:
         filename = os.path.join(base, filename)
@@ -1239,6 +1252,7 @@
     if filename in seen:
         raise zc.buildout.UserError("Recursive file include", seen, filename)
 
+    root_config_file = not seen
     seen.append(filename)
 
     result = {}
@@ -1256,18 +1270,23 @@
 
     result = _annotate(result, filename)
 
+    if root_config_file and 'buildout' in result:
+        dl_options = _update_section(dl_options, result['buildout'])
+
     if extends:
         extends = extends.split()
         extends.reverse()
         for fname in extends:
-            result = _update(_open(base, fname, seen), result)
+            result = _update(_open(base, fname, seen, dl_options, override),
+                             result)
 
     if extended_by:
         self._logger.warn(
             "The extendedBy option is deprecated.  Stop using it."
             )
         for fname in extended_by.split():
-            result = _update(result, _open(base, fname, seen))
+            result = _update(result,
+                             _open(base, fname, seen, dl_options, override))
 
     seen.pop()
     return result

Copied: zc.buildout/trunk/src/zc/buildout/download.py (from rev 102041, zc.buildout/branches/tlotze-download-api/src/zc/buildout/download.py)
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.py	                        (rev 0)
+++ zc.buildout/trunk/src/zc/buildout/download.py	2009-07-21 11:37:27 UTC (rev 102051)
@@ -0,0 +1,217 @@
+##############################################################################
+#
+# Copyright (c) 2009 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Buildout download infrastructure"""
+
+try:
+    from hashlib import md5
+except ImportError:
+    from md5 import new as md5
+from zc.buildout.easy_install import realpath
+import atexit
+import logging
+import os
+import os.path
+import shutil
+import tempfile
+import urllib
+import urlparse
+import zc.buildout
+
+
+class URLOpener(urllib.FancyURLopener):
+    http_error_default = urllib.URLopener.http_error_default
+
+
+class ChecksumError(zc.buildout.UserError):
+    pass
+
+
+url_opener = URLOpener()
+
+
+class Download(object):
+    """Configurable download utility.
+
+    Handles the download cache and offline mode.
+
+    Download(options=None, cache=None, namespace=None, hash_name=False)
+
+    options: mapping of buildout options (e.g. a ``buildout`` config section)
+    cache: path to the download cache (excluding namespaces)
+    namespace: namespace directory to use inside the cache
+    hash_name: whether to use a hash of the URL as cache file name
+    logger: an optional logger to receive download-related log messages
+
+    """
+
+    def __init__(self, options={}, cache=-1, namespace=None,
+                 offline=-1, fallback=False, hash_name=False, logger=None):
+        self.cache = cache
+        if cache == -1:
+            self.cache = options.get('download-cache')
+        self.namespace = namespace
+        self.offline = offline
+        if offline == -1:
+            self.offline = (options.get('offline') == 'true'
+                            or options.get('install-from-cache') == 'true')
+        self.fallback = fallback
+        self.hash_name = hash_name
+        self.logger = logger or logging.getLogger('zc.buildout')
+
+    @property
+    def cache_dir(self):
+        if self.cache is not None:
+            return os.path.join(realpath(self.cache), self.namespace or '')
+
+    def __call__(self, url, md5sum=None, path=None):
+        """Download a file according to the utility's configuration.
+
+        url: URL to download
+        md5sum: MD5 checksum to match
+        path: where to place the downloaded file
+
+        Returns the path to the downloaded file.
+
+        """
+        if self.cache:
+            local_path = self.download_cached(url, md5sum)
+        else:
+            local_path = self.download(url, md5sum, path)
+
+        return locate_at(local_path, path)
+
+    def download_cached(self, url, md5sum=None):
+        """Download a file from a URL using the cache.
+
+        This method assumes that the cache has been configured. Optionally, it
+        raises a ChecksumError if a cached copy of a file has an MD5 mismatch,
+        but will not remove the copy in that case.
+
+        """
+        cache_dir = self.cache_dir
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+        cache_key = self.filename(url)
+        cached_path = os.path.join(cache_dir, cache_key)
+
+        self.logger.debug('Searching cache at %s' % cache_dir)
+        if os.path.isfile(cached_path):
+            if self.fallback:
+                try:
+                    self.download(url, md5sum, cached_path)
+                except ChecksumError:
+                    raise
+                except Exception:
+                    pass
+
+            if not check_md5sum(cached_path, md5sum):
+                raise ChecksumError(
+                    'MD5 checksum mismatch for cached download '
+                    'from %r at %r' % (url, cached_path))
+            self.logger.debug('Using cache file %s' % cached_path)
+        else:
+            self.logger.debug('Cache miss; will cache %s as %s' %
+                              (url, cached_path))
+            self.download(url, md5sum, cached_path)
+
+        return cached_path
+
+    def download(self, url, md5sum=None, path=None):
+        """Download a file from a URL to a given or temporary path.
+
+        An online resource is always downloaded to a temporary file and moved
+        to the specified path only after the download is complete and the
+        checksum (if given) matches. If path is None, the temporary file is
+        returned and scheduled for deletion at process exit.
+
+        """
+        parsed_url = urlparse.urlparse(url, 'file')
+        if parsed_url.scheme == 'file':
+            self.logger.debug('Using local resource %s' % url)
+            if not check_md5sum(parsed_url.path, md5sum):
+                raise ChecksumError(
+                    'MD5 checksum mismatch for local resource at %r.' %
+                    parsed_url.path)
+            return locate_at(parsed_url.path, path)
+
+        if self.offline:
+            raise zc.buildout.UserError(
+                "Couldn't download %r in offline mode." % url)
+
+        self.logger.info('Downloading %s' % url)
+        urllib._urlopener = url_opener
+        handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
+        tmp_path, headers = urllib.urlretrieve(url, tmp_path)
+        if not check_md5sum(tmp_path, md5sum):
+            os.remove(tmp_path)
+            raise ChecksumError(
+                'MD5 checksum mismatch downloading %r' % url)
+
+        if path:
+            shutil.move(tmp_path, path)
+            return path
+        else:
+            atexit.register(remove, tmp_path)
+            return tmp_path
+
+    def filename(self, url):
+        """Determine a file name from a URL according to the configuration.
+
+        """
+        if self.hash_name:
+            return md5(url).hexdigest()
+        else:
+            parsed = urlparse.urlparse(url)
+            for name in reversed(parsed.path.split('/')):
+                if name:
+                    return name
+            else:
+                return '%s:%s' % (parsed.host, parsed.port)
+
+
+def check_md5sum(path, md5sum):
+    """Tell whether the MD5 checksum of the file at path matches.
+
+    No checksum being given is considered a match.
+
+    """
+    if md5sum is None:
+        return True
+
+    f = open(path)
+    checksum = md5()
+    try:
+        chunk = f.read(2**16)
+        while chunk:
+            checksum.update(chunk)
+            chunk = f.read(2**16)
+        return checksum.hexdigest() == md5sum
+    finally:
+        f.close()
+
+
+def remove(path):
+    if os.path.exists(path):
+        os.remove(path)
+
+
+def locate_at(source, dest):
+    if dest is None or realpath(dest) == realpath(source):
+        return source
+
+    try:
+        os.link(source, dest)
+    except (AttributeError, OSError):
+        shutil.copyfile(source, dest)
+    return dest

Copied: zc.buildout/trunk/src/zc/buildout/download.txt (from rev 102041, zc.buildout/branches/tlotze-download-api/src/zc/buildout/download.txt)
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.txt	                        (rev 0)
+++ zc.buildout/trunk/src/zc/buildout/download.txt	2009-07-21 11:37:27 UTC (rev 102051)
@@ -0,0 +1,456 @@
+Using the download utility
+==========================
+
+The ``zc.buildout.download`` module provides a download utility that handles
+the details of downloading files needed for a buildout run from the internet.
+It downloads files to the local file system, using the download cache if
+desired and optionally checking the downloaded files' MD5 checksum.
+
+We setup an HTTP server that provides a file we want to download:
+
+>>> server_data = tmpdir('sample_files')
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+>>> server_url = start_server(server_data)
+
+
+Downloading without using the cache
+-----------------------------------
+
+If no download cache should be used, the download utility is instantiated
+without any arguments:
+
+>>> from zc.buildout.download import Download
+>>> download = Download()
+>>> print download.cache_dir
+None
+
+Downloading a file is achieved by calling the utility with the URL as an
+argument:
+
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/.../buildout-...
+>>> cat(path)
+This is a foo text.
+
+As we aren't using the download cache and haven't specified a target path
+either, the download has ended up in a temporary file:
+
+>>> import tempfile
+>>> path.startswith(tempfile.gettempdir())
+True
+
+When trying to access a file that doesn't exist, we'll get an exception:
+
+>>> download(server_url+'not-there')
+Traceback (most recent call last):
+IOError: ('http error', 404, 'Not Found',
+          <httplib.HTTPMessage instance at 0xa0ffd2c>)
+
+We can also have the downloaded file's MD5 sum checked:
+
+>>> try: from hashlib import md5
+... except ImportError: from md5 import new as md5
+
+>>> path = download(server_url+'foo.txt',
+...                 md5('This is a foo text.').hexdigest())
+
+>>> path = download(server_url+'foo.txt',
+...                 md5('The wrong text.').hexdigest())
+Traceback (most recent call last):
+ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'
+
+The error message in the event of an MD5 checksum mismatch for a local file
+reads somewhat differently:
+
+>>> path = download(join(server_data, 'foo.txt'),
+...                 md5('This is a foo text.').hexdigest())
+
+>>> path = download(join(server_data, 'foo.txt'),
+...                 md5('The wrong text.').hexdigest())
+Traceback (most recent call last):
+ChecksumError: MD5 checksum mismatch for local resource at '/sample_files/foo.txt'.
+
+Finally, we can download the file to a specified place in the file system:
+
+>>> target_dir = tmpdir('download-target')
+>>> path = download(server_url+'foo.txt',
+...                 path=join(target_dir, 'downloaded.txt'))
+>>> print path
+/download-target/downloaded.txt
+>>> cat(path)
+This is a foo text.
+
+Trying to download a file in offline mode will result in an error:
+
+>>> download = Download(cache=None, offline=True)
+>>> download(server_url+'foo.txt')
+Traceback (most recent call last):
+UserError: Couldn't download 'http://localhost/foo.txt' in offline mode.
+
+As an exception to this rule, file system paths and URLs in the ``file``
+scheme will still work:
+
+>>> cat(download(join(server_data, 'foo.txt')))
+This is a foo text.
+>>> cat(download('file://%s/foo.txt' % server_data))
+This is a foo text.
+
+>>> remove(path)
+
+Downloading using the download cache
+------------------------------------
+
+In order to make use of the download cache, we need to configure the download
+utility differently. To do this, we pass a directory path as the ``cache``
+attribute upon instantiation:
+
+>>> cache = tmpdir('download-cache')
+>>> download = Download(cache=cache)
+>>> print download.cache_dir
+/download-cache/
+
+Simple usage
+~~~~~~~~~~~~
+
+When using the cache, a file will be stored in the cache directory when it is
+first downloaded. The file system path returned by the download utility points
+to the cached copy:
+
+>>> ls(cache)
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/download-cache/foo.txt
+>>> cat(path)
+This is a foo text.
+
+Whenever the file is downloaded again, the cached copy is used. Let's change
+the file on the server to see this:
+
+>>> write(server_data, 'foo.txt', 'The wrong text.')
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/download-cache/foo.txt
+>>> cat(path)
+This is a foo text.
+
+If we specify an MD5 checksum for a file that is already in the cache, the
+cached copy's checksum will be verified:
+
+>>> path = download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+Traceback (most recent call last):
+ChecksumError: MD5 checksum mismatch for cached download
+               from 'http://localhost/foo.txt' at '/download-cache/foo.txt'
+
+Trying to access another file at a different URL which has the same base name
+will result in the cached copy being used:
+
+>>> mkdir(server_data, 'other')
+>>> write(server_data, 'other', 'foo.txt', 'The wrong text.')
+>>> path = download(server_url+'other/foo.txt')
+>>> print path
+/download-cache/foo.txt
+>>> cat(path)
+This is a foo text.
+
+Given a target path for the download, the utility will provide a copy of the
+file at that location both when first downloading the file and when using a
+cached copy:
+
+>>> remove(cache, 'foo.txt')
+>>> ls(cache)
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+
+>>> path = download(server_url+'foo.txt',
+...                 path=join(target_dir, 'downloaded.txt'))
+>>> print path
+/download-target/downloaded.txt
+>>> cat(path)
+This is a foo text.
+>>> ls(cache)
+- foo.txt
+
+>>> remove(path)
+>>> write(server_data, 'foo.txt', 'The wrong text.')
+
+>>> path = download(server_url+'foo.txt',
+...                 path=join(target_dir, 'downloaded.txt'))
+>>> print path
+/download-target/downloaded.txt
+>>> cat(path)
+This is a foo text.
+
+In offline mode, downloads from any URL will be successful if the file is
+found in the cache:
+
+>>> download = Download(cache=cache, offline=True)
+>>> cat(download(server_url+'foo.txt'))
+This is a foo text.
+
+Local resources will be cached just like any others since download caches are
+sometimes used to create source distributions:
+
+>>> remove(cache, 'foo.txt')
+>>> ls(cache)
+
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+>>> download = Download(cache=cache)
+
+>>> cat(download('file://' + join(server_data, 'foo.txt'), path=path))
+This is a foo text.
+>>> ls(cache)
+- foo.txt
+
+>>> remove(cache, 'foo.txt')
+
+>>> cat(download(join(server_data, 'foo.txt'), path=path))
+This is a foo text.
+>>> ls(cache)
+- foo.txt
+
+>>> remove(cache, 'foo.txt')
+
+However, resources with checksum mismatches will not be copied to the cache:
+
+>>> download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+Traceback (most recent call last):
+ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'
+>>> ls(cache)
+
+>>> remove(path)
+
+Using namespace sub-directories of the download cache
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is common to store cached copies of downloaded files within sub-directories
+of the download cache to keep some degree of order. For example, zc.buildout
+stores downloaded distributions in a sub-directory named "dist". Those
+sub-directories are also known as namespaces. So far, we haven't specified any
+namespaces to use, so the download utility stored files directly inside the
+download cache. Let's use a namespace "test" instead:
+
+>>> download = Download(cache=cache, namespace='test')
+>>> print download.cache_dir
+/download-cache/test
+
+The namespace sub-directory hasn't been created yet:
+
+>>> ls(cache)
+
+Downloading a file now creates the namespace sub-directory and places a copy
+of the file inside it:
+
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/download-cache/test/foo.txt
+>>> ls(cache)
+d test
+>>> ls(cache, 'test')
+- foo.txt
+>>> cat(path)
+This is a foo text.
+
+The next time we want to download that file, the copy from inside the cache
+namespace is used. To see this clearly, we put a file with the same name but
+different content both on the server and in the cache's root directory:
+
+>>> write(server_data, 'foo.txt', 'The wrong text.')
+>>> write(cache, 'foo.txt', 'The wrong text.')
+
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/download-cache/test/foo.txt
+>>> cat(path)
+This is a foo text.
+
+>>> rmdir(cache, 'test')
+>>> remove(cache, 'foo.txt')
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+
+Using a hash of the URL as the filename in the cache
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+So far, the base name of the downloaded file read from the URL has been used
+for the name of the cached copy of the file. This may not be desirable in some
+cases, for example when downloading files from different locations that have
+the same base name due to some naming convention, or if the file content
+depends on URL parameters. In such cases, an MD5 hash of the complete URL may
+be used as the filename in the cache:
+
+>>> download = Download(cache=cache, hash_name=True)
+>>> path = download(server_url+'foo.txt')
+>>> print path
+/download-cache/09f5793fcdc1716727f72d49519c688d
+>>> cat(path)
+This is a foo text.
+>>> ls(cache)
+- 09f5793fcdc1716727f72d49519c688d
+
+The path was printed just to illustrate matters; we cannot know the real
+checksum since we don't know which port the server happens to listen at when
+the test is run, so we don't actually know the full URL of the file. Let's
+check that the checksum actually belongs to the particular URL used:
+
+>>> path == join(cache, md5(server_url+'foo.txt').hexdigest())
+True
+
+The cached copy is used when downloading the file again:
+
+>>> write(server_data, 'foo.txt', 'The wrong text.')
+>>> path == download(server_url+'foo.txt')
+True
+>>> cat(path)
+This is a foo text.
+>>> ls(cache)
+- 09f5793fcdc1716727f72d49519c688d
+
+If we change the URL, even in such a way that it keeps the base name of the
+file the same, the file will be downloaded again this time and put in the
+cache under a different name:
+
+>>> path2 = download(server_url+'other/foo.txt')
+>>> print path2
+/download-cache/537b6d73267f8f4447586989af8c470e
+>>> path == path2
+False
+>>> path2 == join(cache, md5(server_url+'other/foo.txt').hexdigest())
+True
+>>> cat(path)
+This is a foo text.
+>>> cat(path2)
+The wrong text.
+>>> ls(cache)
+- 09f5793fcdc1716727f72d49519c688d
+- 537b6d73267f8f4447586989af8c470e
+
+>>> remove(path)
+>>> remove(path2)
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+
+
+Using the cache purely as a fall-back
+-------------------------------------
+
+Sometimes it is desirable to try downloading a file from the net if at all
+possible, and use the cache purely as a fall-back option when a server is
+down or if we are in offline mode. This mode is only in effect if a download
+cache is configured in the first place:
+
+>>> download = Download(cache=cache, fallback=True)
+>>> print download.cache_dir
+/download-cache/
+
+A downloaded file will be cached:
+
+>>> ls(cache)
+>>> path = download(server_url+'foo.txt')
+>>> ls(cache)
+- foo.txt
+>>> cat(cache, 'foo.txt')
+This is a foo text.
+
+If the file cannot be served, the cached copy will be used:
+
+>>> remove(server_data, 'foo.txt')
+>>> Download()(server_url+'foo.txt')
+Traceback (most recent call last):
+IOError: ('http error', 404, 'Not Found',
+          <httplib.HTTPMessage instance at 0xa35d36c>)
+>>> path = download(server_url+'foo.txt')
+>>> cat(path)
+This is a foo text.
+
+Similarly, if the file is served but we're in offline mode, we'll fall back to
+using the cache:
+
+>>> write(server_data, 'foo.txt', 'The wrong text.')
+>>> cat(Download()(server_url+'foo.txt'))
+The wrong text.
+>>> offline_download = Download(cache=cache, offline=True, fallback=True)
+>>> path = offline_download(server_url+'foo.txt')
+>>> cat(path)
+This is a foo text.
+
+However, when downloading the file normally with the cache being used in
+fall-back mode, the file will be downloaded from the net and the cached copy
+will be replaced with the new content:
+
+>>> path = download(server_url+'foo.txt')
+>>> cat(path)
+The wrong text.
+>>> cat(cache, 'foo.txt')
+The wrong text.
+
+When trying to download a resource whose checksum does not match, the cached
+copy will neither be used nor overwritten:
+
+>>> write(server_data, 'foo.txt', 'This is a foo text.')
+>>> download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+Traceback (most recent call last):
+ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'
+>>> cat(cache, 'foo.txt')
+The wrong text.
+
+
+Configuring the download utility from buildout options
+------------------------------------------------------
+
+The configuration options explained so far derive from the build logic
+implemented by the calling code. Other options configure the download utility
+for use in a particular project or buildout run; they are read from the
+``buildout`` configuration section. The latter can be passed directly as the
+first argument to the download utility's constructor.
+
+The location of the download cache is specified by the ``download-cache``
+option:
+
+>>> download = Download({'download-cache': cache}, namespace='cmmi')
+>>> print download.cache_dir
+/download-cache/cmmi
+
+Keyword parameters take precedence over the corresponding options:
+
+>>> download = Download({'download-cache': cache}, cache=None)
+>>> print download.cache_dir
+None
+
+Whether to assume offline mode can be inferred from either the ``offline`` or
+the ``install-from-cache`` option. As usual with zc.buildout, these options
+must assume one of the values 'true' and 'false':
+
+>>> download = Download({'offline': 'true'})
+>>> download.offline
+True
+
+>>> download = Download({'offline': 'false'})
+>>> download.offline
+False
+
+>>> download = Download({'install-from-cache': 'true'})
+>>> download.offline
+True
+
+>>> download = Download({'install-from-cache': 'false'})
+>>> download.offline
+False
+
+These two options are combined using logical 'or':
+
+>>> download = Download({'offline': 'true', 'install-from-cache': 'false'})
+>>> download.offline
+True
+
+>>> download = Download({'offline': 'false', 'install-from-cache': 'true'})
+>>> download.offline
+True
+
+The ``offline`` keyword parameter takes precedence over both the ``offline``
+and ``install-from-cache`` options:
+
+>>> download = Download({'offline': 'true'}, offline=False)
+>>> download.offline
+False
+
+>>> download = Download({'install-from-cache': 'false'}, offline=True)
+>>> download.offline
+True

Copied: zc.buildout/trunk/src/zc/buildout/extends-cache.txt (from rev 102041, zc.buildout/branches/tlotze-download-api/src/zc/buildout/extends-cache.txt)
===================================================================
--- zc.buildout/trunk/src/zc/buildout/extends-cache.txt	                        (rev 0)
+++ zc.buildout/trunk/src/zc/buildout/extends-cache.txt	2009-07-21 11:37:27 UTC (rev 102051)
@@ -0,0 +1,377 @@
+Caching extended configuration
+==============================
+
+As mentioned in the general buildout documentation, configuration files can
+extend each other, including the ability to download configuration being
+extended from a URL. If desired, zc.buildout caches downloaded configuration
+in order to be able to use it when run offline.
+
+As we're going to talk about downloading things, let's start an HTTP server.
+Also, all of the following will take place inside the sample buildout.
+
+>>> server_data = tmpdir('server_data')
+>>> server_url = start_server(server_data)
+>>> cd(sample_buildout)
+
+
+Basic use of the extends cache
+------------------------------
+
+We put some base configuration on a server and reference it from a sample
+buildout:
+
+>>> write(server_data, 'base.cfg', """\
+... [buildout]
+... parts =
+... foo = bar
+... """)
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... """ % server_url)
+
+When trying to run this buildout offline, we'll find that we cannot read all
+of the required configuration:
+
+>>> print system(buildout + ' -o')
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+Trying the same online, we can:
+
+>>> print system(buildout)
+Unused options for buildout: 'foo'.
+
+As long as we haven't said anything about caching downloaded configuration,
+nothing gets cached. Offline mode will still cause the buildout to fail:
+
+>>> print system(buildout + ' -o')
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+Let's now specify a cache for base configuration files. This cache is
+different from the download cache used by recipes for caching distributions
+and other files; one might, however, use a namespace subdirectory of the
+download cache for it. The configuration cache we specify will be created when
+running buildout and the base.cfg file will be put in it (with the file name
+being a hash of the complete URL):
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... extends-cache = cache
+... """ % server_url)
+
+>>> print system(buildout)
+Unused options for buildout: 'foo'.
+
+>>> cache = join(sample_buildout, 'cache')
+>>> ls(cache)
+-  5aedc98d7e769290a29d654a591a3a45
+
+>>> import os
+>>> cat(cache, os.listdir(cache)[0])
+[buildout]
+parts =
+foo = bar
+
+We can now run buildout offline as it will read base.cfg from the cache:
+
+>>> print system(buildout + ' -o')
+Unused options for buildout: 'foo'.
+
+The cache is being used purely as a fall-back in case we are offline or don't
+have access to a configuration file to be downloaded. As long as we are
+online, buildout attempts to download a fresh copy of each file even if a
+cached copy of the file exists. To see this, we put different configuration in
+the same place on the server and run buildout in offline mode so it takes
+base.cfg from the cache:
+
+>>> write(server_data, 'base.cfg', """\
+... [buildout]
+... parts =
+... bar = baz
+... """)
+
+>>> print system(buildout + ' -o')
+Unused options for buildout: 'foo'.
+
+In online mode, buildout will download and use the modified version:
+
+>>> print system(buildout)
+Unused options for buildout: 'bar'.
+
+Trying offline mode again, the new version will be used as it has been put in
+the cache now:
+
+>>> print system(buildout + ' -o')
+Unused options for buildout: 'bar'.
+
+Clean up:
+
+>>> rmdir(cache)
+
+
+Specifying extends cache and offline mode
+-----------------------------------------
+
+Normally, the values of buildout options such as the location of a download
+cache or whether to use offline mode are determined by first reading the
+user's default configuration, updating it with the project's configuration and
+finally applying command-line options. User and project configuration are
+assembled by reading a file such as ``~/.buildout/default.cfg``,
+``buildout.cfg`` or a URL given on the command line, recursively (depth-first)
+downloading any base configuration specified by the ``buildout:extends``
+option read from each of those config files, and finally evaluating each
+config file to provide default values for options not yet read.
+
+This works fine for all options that do not influence how configuration is
+downloaded in the first place. The ``extends-cache`` and ``offline`` options,
+however, are treated differently from the procedure described in order to make
+it simple and obvious to see where a particular configuration file came from
+under any particular circumstances.
+
+- Offline and extends-cache settings are read from the two root config files
+  exclusively. Otherwise one could construct configuration files that, when
+  read, imply that they should have been read from a different source than
+  they have. Also, specifying the extends cache within a file that might have
+  to be taken from the cache before being read wouldn't make a lot of sense.
+
+- Offline and extends-cache settings given by the user's defaults apply to the
+  process of assembling the project's configuration. If no extends cache has
+  been specified by the user's default configuration, the project's root
+  config file must be available, be it from disk or from the net.
+
+- Offline mode turned on by the ``-o`` command line option is honoured from
+  the beginning even though command line options are applied to the
+  configuration last. If offline mode is not requested by the command line, it
+  may be switched on by either the user's or the project's config root.
+
+Extends cache
+~~~~~~~~~~~~~
+
+Let's see the above rules in action. We create a new home directory for our
+user and write user and project configuration that recursively extends online
+bases, using different caches:
+
+>>> mkdir('home')
+>>> mkdir('home', '.buildout')
+>>> os.environ['HOME'] = join(sample_buildout, 'home')
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... extends-cache = user-cache
+... """)
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... """ % server_url)
+>>> write(server_data, 'base_default.cfg', """\
+... [buildout]
+... foo = bar
+... offline = false
+... """)
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... extends-cache = cache
+... """)
+>>> write('fancy.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... """ % server_url)
+>>> write(server_data, 'base.cfg', """\
+... [buildout]
+... parts =
+... offline = false
+... """)
+
+Buildout will now assemble its configuration from all of these 6 files,
+defaults first. The online resources end up in the respective extends caches:
+
+>>> print system(buildout)
+Unused options for buildout: 'foo'.
+
+>>> ls('user-cache')
+-  10e772cf422123ef6c64ae770f555740
+>>> cat('user-cache', os.listdir('user-cache')[0])
+[buildout]
+foo = bar
+offline = false
+
+>>> ls('cache')
+-  c72213127e6eb2208a3e1fc1dba771a7
+>>> cat('cache', os.listdir('cache')[0])
+[buildout]
+parts =
+offline = false
+
+If, on the other hand, the extends caches are specified in files that get
+extended themselves, they won't be used for assembling the configuration they
+belong to (user's or project's, resp.). The extends cache specified by the
+user's defaults does, however, apply to downloading project configuration.
+Let's rewrite the config files, clean out the caches and re-run buildout:
+
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... """)
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... extends-cache = user-cache
+... """ % server_url)
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... """)
+>>> write('fancy.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... extends-cache = cache
+... """ % server_url)
+
+>>> remove('user-cache', os.listdir('user-cache')[0])
+>>> remove('cache', os.listdir('cache')[0])
+
+>>> print system(buildout)
+Unused options for buildout: 'foo'.
+
+>>> ls('user-cache')
+-  0548bad6002359532de37385bb532e26
+>>> cat('user-cache', os.listdir('user-cache')[0])
+[buildout]
+parts =
+offline = false
+
+>>> ls('cache')
+
+Clean up:
+
+>>> rmdir('user-cache')
+>>> rmdir('cache')
+
+Offline mode and installation from cache
+----------------------------~~~~~~~~~~~~
+
+If we run buildout in offline mode now, it will fail because it cannot get at
+the remote configuration file needed by the user's defaults:
+
+>>> print system(buildout + ' -o')
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base_default.cfg' in offline mode.
+
+Let's now successively turn on offline mode by different parts of the
+configuration and see when buildout applies this setting in each case:
+
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... offline = true
+... """)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base_default.cfg' in offline mode.
+
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... """)
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... offline = true
+... """ % server_url)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... """ % server_url)
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... offline = true
+... """)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... """)
+>>> write('fancy.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... offline = true
+... """ % server_url)
+>>> print system(buildout)
+Unused options for buildout: 'foo'.
+
+The ``install-from-cache`` option is treated accordingly:
+
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... install-from-cache = true
+... """)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base_default.cfg' in offline mode.
+
+>>> write('home', '.buildout', 'default.cfg', """\
+... [buildout]
+... extends = fancy_default.cfg
+... """)
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... install-from-cache = true
+... """ % server_url)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+>>> write('home', '.buildout', 'fancy_default.cfg', """\
+... [buildout]
+... extends = %sbase_default.cfg
+... """ % server_url)
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... install-from-cache = true
+... """)
+>>> print system(buildout)
+While:
+  Initializing.
+Error: Couldn't download 'http://localhost/base.cfg' in offline mode.
+
+>>> write('buildout.cfg', """\
+... [buildout]
+... extends = fancy.cfg
+... """)
+>>> write('fancy.cfg', """\
+... [buildout]
+... extends = %sbase.cfg
+... install-from-cache = true
+... """ % server_url)
+>>> print system(buildout)
+While:
+  Installing.
+  Checking for upgrades.
+An internal error occured ...
+ValueError: install_from_cache set to true with no download cache

Modified: zc.buildout/trunk/src/zc/buildout/testing.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/testing.py	2009-07-20 21:28:23 UTC (rev 102050)
+++ zc.buildout/trunk/src/zc/buildout/testing.py	2009-07-21 11:37:27 UTC (rev 102051)
@@ -1,6 +1,6 @@
 #############################################################################
 #
-# Copyright (c) 2004 Zope Corporation and Contributors.
+# Copyright (c) 2004-2009 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -197,7 +197,7 @@
     while time.time() < deadline:
         if func(*args, **kw):
             return
-        time.sleep('.01')
+        time.sleep(0.01)
     raise ValueError('Timed out waiting for: '+label)
 
 def buildoutSetUp(test):

Modified: zc.buildout/trunk/src/zc/buildout/tests.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/tests.py	2009-07-20 21:28:23 UTC (rev 102050)
+++ zc.buildout/trunk/src/zc/buildout/tests.py	2009-07-21 11:37:27 UTC (rev 102051)
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2004 Zope Corporation and Contributors.
+# Copyright (c) 2004-2009 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -2811,6 +2811,21 @@
                (re.compile(r'\\[\\]?'), '/'),
                ]),
             ),
+
+        doctest.DocFileSuite(
+            'download.txt', 'extends-cache.txt',
+            setUp=easy_install_SetUp,
+            tearDown=zc.buildout.testing.buildoutTearDown,
+            optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS,
+            checker=renormalizing.RENormalizing([
+               (re.compile('0x[0-9a-f]+'), '<MEM ADDRESS>'),
+               (re.compile('http://localhost:[0-9]{4,5}/'),
+                'http://localhost/'),
+               (re.compile('[0-9a-f]{32}'), '<MD5 CHECKSUM>'),
+               zc.buildout.testing.normalize_path,
+               ]),
+            ),
+
         doctest.DocTestSuite(
             setUp=easy_install_SetUp,
             tearDown=zc.buildout.testing.buildoutTearDown,



More information about the Checkins mailing list