[Checkins] SVN: zc.buildout/trunk/src/zc/buildout/ - changed the download API so that downloading a file returns both the local

Thomas Lotze tl at gocept.com
Wed Aug 12 08:50:30 EDT 2009


Log message for revision 102708:
  - changed the download API so that downloading a file returns both the local
    path and a flag indicating whether the downloaded copy is a temporary file
  - use this flag to clean up temporary files both when downloading extended
    configuration files and in the tests
  

Changed:
  U   zc.buildout/trunk/src/zc/buildout/buildout.py
  U   zc.buildout/trunk/src/zc/buildout/download.py
  U   zc.buildout/trunk/src/zc/buildout/download.txt
  U   zc.buildout/trunk/src/zc/buildout/extends-cache.txt

-=-
Modified: zc.buildout/trunk/src/zc/buildout/buildout.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/buildout.py	2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/buildout.py	2009-08-12 12:50:30 UTC (rev 102708)
@@ -1238,11 +1238,13 @@
     """
     _update_section(dl_options, override)
     _dl_options = _unannotate_section(dl_options.copy())
+    is_temp = False
     download = zc.buildout.download.Download(
         _dl_options, cache=_dl_options.get('extends-cache'), fallback=True,
         hash_name=True)
     if _isurl(filename):
-        fp = open(download(filename))
+        path, is_temp = download(filename)
+        fp = open(path)
         base = filename[:filename.rfind('/')]
     elif _isurl(base):
         if os.path.isabs(filename):
@@ -1250,7 +1252,8 @@
             base = os.path.dirname(filename)
         else:
             filename = base + '/' + filename
-            fp = open(download(filename))
+            path, is_temp = download(filename)
+            fp = open(path)
             base = filename[:filename.rfind('/')]
     else:
         filename = os.path.join(base, filename)
@@ -1258,6 +1261,8 @@
         base = os.path.dirname(filename)
 
     if filename in seen:
+        if is_temp:
+            os.unlink(path)
         raise zc.buildout.UserError("Recursive file include", seen, filename)
 
     root_config_file = not seen
@@ -1268,6 +1273,9 @@
     parser = ConfigParser.RawConfigParser()
     parser.optionxform = lambda s: s
     parser.readfp(fp)
+    if is_temp:
+        os.unlink(path)
+
     extends = extended_by = None
     for section in parser.sections():
         options = dict(parser.items(section))

Modified: zc.buildout/trunk/src/zc/buildout/download.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.py	2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/download.py	2009-08-12 12:50:30 UTC (rev 102708)
@@ -84,11 +84,11 @@
 
         """
         if self.cache:
-            local_path = self.download_cached(url, md5sum)
+            local_path, is_temp = self.download_cached(url, md5sum)
         else:
-            local_path = self.download(url, md5sum, path)
+            local_path, is_temp = self.download(url, md5sum, path)
 
-        return locate_at(local_path, path)
+        return locate_at(local_path, path), is_temp
 
     def download_cached(self, url, md5sum=None):
         """Download a file from a URL using the cache.
@@ -106,9 +106,10 @@
 
         self.logger.debug('Searching cache at %s' % cache_dir)
         if os.path.isfile(cached_path):
+            is_temp = False
             if self.fallback:
                 try:
-                    self.download(url, md5sum, cached_path)
+                    _, is_temp = self.download(url, md5sum, cached_path)
                 except ChecksumError:
                     raise
                 except Exception:
@@ -122,9 +123,9 @@
         else:
             self.logger.debug('Cache miss; will cache %s as %s' %
                               (url, cached_path))
-            self.download(url, md5sum, cached_path)
+            _, is_temp = self.download(url, md5sum, cached_path)
 
-        return cached_path
+        return cached_path, is_temp
 
     def download(self, url, md5sum=None, path=None):
         """Download a file from a URL to a given or temporary path.
@@ -143,7 +144,7 @@
                 raise ChecksumError(
                     'MD5 checksum mismatch for local resource at %r.' %
                     url_path)
-            return locate_at(url_path, path)
+            return locate_at(url_path, path), False
 
         if self.offline:
             raise zc.buildout.UserError(
@@ -152,18 +153,23 @@
         self.logger.info('Downloading %s' % url)
         urllib._urlopener = url_opener
         handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
-        tmp_path, headers = urllib.urlretrieve(url, tmp_path)
-        os.close(handle)
-        if not check_md5sum(tmp_path, md5sum):
-            os.remove(tmp_path)
-            raise ChecksumError(
-                'MD5 checksum mismatch downloading %r' % url)
+        try:
+            try:
+                tmp_path, headers = urllib.urlretrieve(url, tmp_path)
+                if not check_md5sum(tmp_path, md5sum):
+                    raise ChecksumError(
+                        'MD5 checksum mismatch downloading %r' % url)
+            except:
+                os.remove(tmp_path)
+                raise
+        finally:
+            os.close(handle)
 
         if path:
             shutil.move(tmp_path, path)
-            return path
+            return path, False
         else:
-            return tmp_path
+            return tmp_path, True
 
     def filename(self, url):
         """Determine a file name from a URL according to the configuration.

Modified: zc.buildout/trunk/src/zc/buildout/download.txt
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.txt	2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/download.txt	2009-08-12 12:50:30 UTC (rev 102708)
@@ -12,7 +12,14 @@
 >>> write(server_data, 'foo.txt', 'This is a foo text.')
 >>> server_url = start_server(server_data)
 
+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
 
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+
+
 Downloading without using the cache
 -----------------------------------
 
@@ -25,9 +32,11 @@
 None
 
 Downloading a file is achieved by calling the utility with the URL as an
-argument:
+argument. A tuple is returned that consists of the path to the downloaded copy
+of the file and a boolean value indicating whether this is a temporary file
+meant to be cleaned up during the same buildout run:
 
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /.../buildout-...
 >>> cat(path)
@@ -36,10 +45,17 @@
 As we aren't using the download cache and haven't specified a target path
 either, the download has ended up in a temporary file:
 
+>>> is_temp
+True
+
 >>> import tempfile
 >>> path.startswith(tempfile.gettempdir())
 True
 
+We are responsible for cleaning up temporary files behind us:
+
+>>> remove(path)
+
 When trying to access a file that doesn't exist, we'll get an exception:
 
 >>> download(server_url+'not-there')
@@ -47,39 +63,51 @@
 IOError: ('http error', 404, 'Not Found',
           <httplib.HTTPMessage instance at 0xa0ffd2c>)
 
+Downloading a local file doesn't produce a temporary file but simply returns
+the local file itself:
+
+>>> download(join(server_data, 'foo.txt'))
+('/sample_files/foo.txt', False)
+
 We can also have the downloaded file's MD5 sum checked:
 
 >>> try: from hashlib import md5
 ... except ImportError: from md5 import new as md5
 
->>> path = download(server_url+'foo.txt',
-...                 md5('This is a foo text.').hexdigest())
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          md5('This is a foo text.').hexdigest())
+>>> is_temp
+True
+>>> remove(path)
 
->>> path = download(server_url+'foo.txt',
-...                 md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt',
+...          md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'
 
 The error message in the event of an MD5 checksum mismatch for a local file
 reads somewhat differently:
 
->>> path = download(join(server_data, 'foo.txt'),
-...                 md5('This is a foo text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+...               md5('This is a foo text.').hexdigest())
+('/sample_files/foo.txt', False)
 
->>> path = download(join(server_data, 'foo.txt'),
-...                 md5('The wrong text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+...          md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch for local resource at '/sample_files/foo.txt'.
 
 Finally, we can download the file to a specified place in the file system:
 
 >>> target_dir = tmpdir('download-target')
->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 Trying to download a file in offline mode will result in an error:
 
@@ -91,13 +119,14 @@
 As an exception to this rule, file system paths and URLs in the ``file``
 scheme will still work:
 
->>> cat(download(join(server_data, 'foo.txt')))
+>>> cat(download(join(server_data, 'foo.txt'))[0])
 This is a foo text.
->>> cat(download('file://%s/foo.txt' % server_data))
+>>> cat(download('file://%s/foo.txt' % server_data)[0])
 This is a foo text.
 
 >>> remove(path)
 
+
 Downloading using the download cache
 ------------------------------------
 
@@ -118,17 +147,19 @@
 to the cached copy:
 
 >>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 Whenever the file is downloaded again, the cached copy is used. Let's change
 the file on the server to see this:
 
 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
@@ -137,7 +168,7 @@
 If we specify an MD5 checksum for a file that is already in the cache, the
 cached copy's checksum will be verified:
 
->>> path = download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch for cached download
                from 'http://localhost/foo.txt' at '/download-cache/foo.txt'
@@ -147,7 +178,7 @@
 
 >>> mkdir(server_data, 'other')
 >>> write(server_data, 'other', 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'other/foo.txt')
+>>> path, is_temp = download(server_url+'other/foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
@@ -161,30 +192,34 @@
 >>> ls(cache)
 >>> write(server_data, 'foo.txt', 'This is a foo text.')
 
->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 >>> ls(cache)
 - foo.txt
 
 >>> remove(path)
 >>> write(server_data, 'foo.txt', 'The wrong text.')
 
->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 In offline mode, downloads from any URL will be successful if the file is
 found in the cache:
 
 >>> download = Download(cache=cache, offline=True)
->>> cat(download(server_url+'foo.txt'))
+>>> cat(download(server_url+'foo.txt')[0])
 This is a foo text.
 
 Local resources will be cached just like any others since download caches are
@@ -196,14 +231,14 @@
 >>> write(server_data, 'foo.txt', 'This is a foo text.')
 >>> download = Download(cache=cache)
 
->>> cat(download('file://' + join(server_data, 'foo.txt'), path=path))
+>>> cat(download('file://' + join(server_data, 'foo.txt'), path=path)[0])
 This is a foo text.
 >>> ls(cache)
 - foo.txt
 
 >>> remove(cache, 'foo.txt')
 
->>> cat(download(join(server_data, 'foo.txt'), path=path))
+>>> cat(download(join(server_data, 'foo.txt'), path=path)[0])
 This is a foo text.
 >>> ls(cache)
 - foo.txt
@@ -240,7 +275,7 @@
 Downloading a file now creates the namespace sub-directory and places a copy
 of the file inside it:
 
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/test/foo.txt
 >>> ls(cache)
@@ -249,6 +284,8 @@
 - foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 The next time we want to download that file, the copy from inside the cache
 namespace is used. To see this clearly, we put a file with the same name but
@@ -257,7 +294,7 @@
 >>> write(server_data, 'foo.txt', 'The wrong text.')
 >>> write(cache, 'foo.txt', 'The wrong text.')
 
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/test/foo.txt
 >>> cat(path)
@@ -278,7 +315,7 @@
 be used as the filename in the cache:
 
 >>> download = Download(cache=cache, hash_name=True)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/09f5793fcdc1716727f72d49519c688d
 >>> cat(path)
@@ -297,7 +334,7 @@
 The cached copy is used when downloading the file again:
 
 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path == download(server_url+'foo.txt')
+>>> (path, is_temp) == download(server_url+'foo.txt')
 True
 >>> cat(path)
 This is a foo text.
@@ -308,7 +345,7 @@
 file the same, the file will be downloaded again this time and put in the
 cache under a different name:
 
->>> path2 = download(server_url+'other/foo.txt')
+>>> path2, is_temp = download(server_url+'other/foo.txt')
 >>> print path2
 /download-cache/537b6d73267f8f4447586989af8c470e
 >>> path == path2
@@ -343,11 +380,13 @@
 A downloaded file will be cached:
 
 >>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> ls(cache)
 - foo.txt
 >>> cat(cache, 'foo.txt')
 This is a foo text.
+>>> is_temp
+False
 
 If the file cannot be served, the cached copy will be used:
 
@@ -356,27 +395,33 @@
 Traceback (most recent call last):
 IOError: ('http error', 404, 'Not Found',
           <httplib.HTTPMessage instance at 0xa35d36c>)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 Similarly, if the file is served but we're in offline mode, we'll fall back to
 using the cache:
 
 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> cat(Download()(server_url+'foo.txt'))
-The wrong text.
+>>> get(server_url+'foo.txt')
+'The wrong text.'
+
 >>> offline_download = Download(cache=cache, offline=True, fallback=True)
->>> path = offline_download(server_url+'foo.txt')
+>>> path, is_temp = offline_download(server_url+'foo.txt')
+>>> print path
+/download-cache/foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 
 However, when downloading the file normally with the cache being used in
 fall-back mode, the file will be downloaded from the net and the cached copy
 will be replaced with the new content:
 
->>> path = download(server_url+'foo.txt')
->>> cat(path)
+>>> cat(download(server_url+'foo.txt')[0])
 The wrong text.
 >>> cat(cache, 'foo.txt')
 The wrong text.
@@ -454,3 +499,15 @@
 >>> download = Download({'install-from-cache': 'false'}, offline=True)
 >>> download.offline
 True
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir

Modified: zc.buildout/trunk/src/zc/buildout/extends-cache.txt
===================================================================
--- zc.buildout/trunk/src/zc/buildout/extends-cache.txt	2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/extends-cache.txt	2009-08-12 12:50:30 UTC (rev 102708)
@@ -13,7 +13,14 @@
 >>> server_url = start_server(server_data)
 >>> cd(sample_buildout)
 
+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
 
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+
+
 Basic use of the extends cache
 ------------------------------
 
@@ -375,3 +382,15 @@
   Checking for upgrades.
 An internal error occured ...
 ValueError: install_from_cache set to true with no download cache
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir



More information about the Checkins mailing list