[Checkins] SVN: zc.buildout/trunk/src/zc/buildout/ - changed the download API so that downloading a file returns both the local
Thomas Lotze
tl at gocept.com
Wed Aug 12 08:50:30 EDT 2009
Log message for revision 102708:
- changed the download API so that downloading a file returns both the local
path and a flag indicating whether the downloaded copy is a temporary file
- use this flag to clean up temporary files both when downloading extended
configuration files and in the tests
Changed:
U zc.buildout/trunk/src/zc/buildout/buildout.py
U zc.buildout/trunk/src/zc/buildout/download.py
U zc.buildout/trunk/src/zc/buildout/download.txt
U zc.buildout/trunk/src/zc/buildout/extends-cache.txt
-=-
Modified: zc.buildout/trunk/src/zc/buildout/buildout.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/buildout.py 2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/buildout.py 2009-08-12 12:50:30 UTC (rev 102708)
@@ -1238,11 +1238,13 @@
"""
_update_section(dl_options, override)
_dl_options = _unannotate_section(dl_options.copy())
+ is_temp = False
download = zc.buildout.download.Download(
_dl_options, cache=_dl_options.get('extends-cache'), fallback=True,
hash_name=True)
if _isurl(filename):
- fp = open(download(filename))
+ path, is_temp = download(filename)
+ fp = open(path)
base = filename[:filename.rfind('/')]
elif _isurl(base):
if os.path.isabs(filename):
@@ -1250,7 +1252,8 @@
base = os.path.dirname(filename)
else:
filename = base + '/' + filename
- fp = open(download(filename))
+ path, is_temp = download(filename)
+ fp = open(path)
base = filename[:filename.rfind('/')]
else:
filename = os.path.join(base, filename)
@@ -1258,6 +1261,8 @@
base = os.path.dirname(filename)
if filename in seen:
+ if is_temp:
+ os.unlink(path)
raise zc.buildout.UserError("Recursive file include", seen, filename)
root_config_file = not seen
@@ -1268,6 +1273,9 @@
parser = ConfigParser.RawConfigParser()
parser.optionxform = lambda s: s
parser.readfp(fp)
+ if is_temp:
+ os.unlink(path)
+
extends = extended_by = None
for section in parser.sections():
options = dict(parser.items(section))
Modified: zc.buildout/trunk/src/zc/buildout/download.py
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.py 2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/download.py 2009-08-12 12:50:30 UTC (rev 102708)
@@ -84,11 +84,11 @@
"""
if self.cache:
- local_path = self.download_cached(url, md5sum)
+ local_path, is_temp = self.download_cached(url, md5sum)
else:
- local_path = self.download(url, md5sum, path)
+ local_path, is_temp = self.download(url, md5sum, path)
- return locate_at(local_path, path)
+ return locate_at(local_path, path), is_temp
def download_cached(self, url, md5sum=None):
"""Download a file from a URL using the cache.
@@ -106,9 +106,10 @@
self.logger.debug('Searching cache at %s' % cache_dir)
if os.path.isfile(cached_path):
+ is_temp = False
if self.fallback:
try:
- self.download(url, md5sum, cached_path)
+ _, is_temp = self.download(url, md5sum, cached_path)
except ChecksumError:
raise
except Exception:
@@ -122,9 +123,9 @@
else:
self.logger.debug('Cache miss; will cache %s as %s' %
(url, cached_path))
- self.download(url, md5sum, cached_path)
+ _, is_temp = self.download(url, md5sum, cached_path)
- return cached_path
+ return cached_path, is_temp
def download(self, url, md5sum=None, path=None):
"""Download a file from a URL to a given or temporary path.
@@ -143,7 +144,7 @@
raise ChecksumError(
'MD5 checksum mismatch for local resource at %r.' %
url_path)
- return locate_at(url_path, path)
+ return locate_at(url_path, path), False
if self.offline:
raise zc.buildout.UserError(
@@ -152,18 +153,23 @@
self.logger.info('Downloading %s' % url)
urllib._urlopener = url_opener
handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
- tmp_path, headers = urllib.urlretrieve(url, tmp_path)
- os.close(handle)
- if not check_md5sum(tmp_path, md5sum):
- os.remove(tmp_path)
- raise ChecksumError(
- 'MD5 checksum mismatch downloading %r' % url)
+ try:
+ try:
+ tmp_path, headers = urllib.urlretrieve(url, tmp_path)
+ if not check_md5sum(tmp_path, md5sum):
+ raise ChecksumError(
+ 'MD5 checksum mismatch downloading %r' % url)
+ except:
+ os.remove(tmp_path)
+ raise
+ finally:
+ os.close(handle)
if path:
shutil.move(tmp_path, path)
- return path
+ return path, False
else:
- return tmp_path
+ return tmp_path, True
def filename(self, url):
"""Determine a file name from a URL according to the configuration.
Modified: zc.buildout/trunk/src/zc/buildout/download.txt
===================================================================
--- zc.buildout/trunk/src/zc/buildout/download.txt 2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/download.txt 2009-08-12 12:50:30 UTC (rev 102708)
@@ -12,7 +12,14 @@
>>> write(server_data, 'foo.txt', 'This is a foo text.')
>>> server_url = start_server(server_data)
+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+
+
Downloading without using the cache
-----------------------------------
@@ -25,9 +32,11 @@
None
Downloading a file is achieved by calling the utility with the URL as an
-argument:
+argument. A tuple is returned that consists of the path to the downloaded copy
+of the file and a boolean value indicating whether this is a temporary file
+meant to be cleaned up during the same buildout run:
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/.../buildout-...
>>> cat(path)
@@ -36,10 +45,17 @@
As we aren't using the download cache and haven't specified a target path
either, the download has ended up in a temporary file:
+>>> is_temp
+True
+
>>> import tempfile
>>> path.startswith(tempfile.gettempdir())
True
+We are responsible for cleaning up temporary files behind us:
+
+>>> remove(path)
+
When trying to access a file that doesn't exist, we'll get an exception:
>>> download(server_url+'not-there')
@@ -47,39 +63,51 @@
IOError: ('http error', 404, 'Not Found',
<httplib.HTTPMessage instance at 0xa0ffd2c>)
+Downloading a local file doesn't produce a temporary file but simply returns
+the local file itself:
+
+>>> download(join(server_data, 'foo.txt'))
+('/sample_files/foo.txt', False)
+
We can also have the downloaded file's MD5 sum checked:
>>> try: from hashlib import md5
... except ImportError: from md5 import new as md5
->>> path = download(server_url+'foo.txt',
-... md5('This is a foo text.').hexdigest())
+>>> path, is_temp = download(server_url+'foo.txt',
+... md5('This is a foo text.').hexdigest())
+>>> is_temp
+True
+>>> remove(path)
->>> path = download(server_url+'foo.txt',
-... md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt',
+... md5('The wrong text.').hexdigest())
Traceback (most recent call last):
ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'
The error message in the event of an MD5 checksum mismatch for a local file
reads somewhat differently:
->>> path = download(join(server_data, 'foo.txt'),
-... md5('This is a foo text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+... md5('This is a foo text.').hexdigest())
+('/sample_files/foo.txt', False)
->>> path = download(join(server_data, 'foo.txt'),
-... md5('The wrong text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+... md5('The wrong text.').hexdigest())
Traceback (most recent call last):
ChecksumError: MD5 checksum mismatch for local resource at '/sample_files/foo.txt'.
Finally, we can download the file to a specified place in the file system:
>>> target_dir = tmpdir('download-target')
->>> path = download(server_url+'foo.txt',
-... path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+... path=join(target_dir, 'downloaded.txt'))
>>> print path
/download-target/downloaded.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
Trying to download a file in offline mode will result in an error:
@@ -91,13 +119,14 @@
As an exception to this rule, file system paths and URLs in the ``file``
scheme will still work:
->>> cat(download(join(server_data, 'foo.txt')))
+>>> cat(download(join(server_data, 'foo.txt'))[0])
This is a foo text.
->>> cat(download('file://%s/foo.txt' % server_data))
+>>> cat(download('file://%s/foo.txt' % server_data)[0])
This is a foo text.
>>> remove(path)
+
Downloading using the download cache
------------------------------------
@@ -118,17 +147,19 @@
to the cached copy:
>>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/download-cache/foo.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
Whenever the file is downloaded again, the cached copy is used. Let's change
the file on the server to see this:
>>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/download-cache/foo.txt
>>> cat(path)
@@ -137,7 +168,7 @@
If we specify an MD5 checksum for a file that is already in the cache, the
cached copy's checksum will be verified:
->>> path = download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
Traceback (most recent call last):
ChecksumError: MD5 checksum mismatch for cached download
from 'http://localhost/foo.txt' at '/download-cache/foo.txt'
@@ -147,7 +178,7 @@
>>> mkdir(server_data, 'other')
>>> write(server_data, 'other', 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'other/foo.txt')
+>>> path, is_temp = download(server_url+'other/foo.txt')
>>> print path
/download-cache/foo.txt
>>> cat(path)
@@ -161,30 +192,34 @@
>>> ls(cache)
>>> write(server_data, 'foo.txt', 'This is a foo text.')
->>> path = download(server_url+'foo.txt',
-... path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+... path=join(target_dir, 'downloaded.txt'))
>>> print path
/download-target/downloaded.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
>>> ls(cache)
- foo.txt
>>> remove(path)
>>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'foo.txt',
-... path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+... path=join(target_dir, 'downloaded.txt'))
>>> print path
/download-target/downloaded.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
In offline mode, downloads from any URL will be successful if the file is
found in the cache:
>>> download = Download(cache=cache, offline=True)
->>> cat(download(server_url+'foo.txt'))
+>>> cat(download(server_url+'foo.txt')[0])
This is a foo text.
Local resources will be cached just like any others since download caches are
@@ -196,14 +231,14 @@
>>> write(server_data, 'foo.txt', 'This is a foo text.')
>>> download = Download(cache=cache)
->>> cat(download('file://' + join(server_data, 'foo.txt'), path=path))
+>>> cat(download('file://' + join(server_data, 'foo.txt'), path=path)[0])
This is a foo text.
>>> ls(cache)
- foo.txt
>>> remove(cache, 'foo.txt')
->>> cat(download(join(server_data, 'foo.txt'), path=path))
+>>> cat(download(join(server_data, 'foo.txt'), path=path)[0])
This is a foo text.
>>> ls(cache)
- foo.txt
@@ -240,7 +275,7 @@
Downloading a file now creates the namespace sub-directory and places a copy
of the file inside it:
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/download-cache/test/foo.txt
>>> ls(cache)
@@ -249,6 +284,8 @@
- foo.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
The next time we want to download that file, the copy from inside the cache
namespace is used. To see this clearly, we put a file with the same name but
@@ -257,7 +294,7 @@
>>> write(server_data, 'foo.txt', 'The wrong text.')
>>> write(cache, 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/download-cache/test/foo.txt
>>> cat(path)
@@ -278,7 +315,7 @@
be used as the filename in the cache:
>>> download = Download(cache=cache, hash_name=True)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> print path
/download-cache/09f5793fcdc1716727f72d49519c688d
>>> cat(path)
@@ -297,7 +334,7 @@
The cached copy is used when downloading the file again:
>>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path == download(server_url+'foo.txt')
+>>> (path, is_temp) == download(server_url+'foo.txt')
True
>>> cat(path)
This is a foo text.
@@ -308,7 +345,7 @@
file the same, the file will be downloaded again this time and put in the
cache under a different name:
->>> path2 = download(server_url+'other/foo.txt')
+>>> path2, is_temp = download(server_url+'other/foo.txt')
>>> print path2
/download-cache/537b6d73267f8f4447586989af8c470e
>>> path == path2
@@ -343,11 +380,13 @@
A downloaded file will be cached:
>>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> ls(cache)
- foo.txt
>>> cat(cache, 'foo.txt')
This is a foo text.
+>>> is_temp
+False
If the file cannot be served, the cached copy will be used:
@@ -356,27 +395,33 @@
Traceback (most recent call last):
IOError: ('http error', 404, 'Not Found',
<httplib.HTTPMessage instance at 0xa35d36c>)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
Similarly, if the file is served but we're in offline mode, we'll fall back to
using the cache:
>>> write(server_data, 'foo.txt', 'The wrong text.')
->>> cat(Download()(server_url+'foo.txt'))
-The wrong text.
+>>> get(server_url+'foo.txt')
+'The wrong text.'
+
>>> offline_download = Download(cache=cache, offline=True, fallback=True)
->>> path = offline_download(server_url+'foo.txt')
+>>> path, is_temp = offline_download(server_url+'foo.txt')
+>>> print path
+/download-cache/foo.txt
>>> cat(path)
This is a foo text.
+>>> is_temp
+False
However, when downloading the file normally with the cache being used in
fall-back mode, the file will be downloaded from the net and the cached copy
will be replaced with the new content:
->>> path = download(server_url+'foo.txt')
->>> cat(path)
+>>> cat(download(server_url+'foo.txt')[0])
The wrong text.
>>> cat(cache, 'foo.txt')
The wrong text.
@@ -454,3 +499,15 @@
>>> download = Download({'install-from-cache': 'false'}, offline=True)
>>> download.offline
True
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir
Modified: zc.buildout/trunk/src/zc/buildout/extends-cache.txt
===================================================================
--- zc.buildout/trunk/src/zc/buildout/extends-cache.txt 2009-08-12 10:48:19 UTC (rev 102707)
+++ zc.buildout/trunk/src/zc/buildout/extends-cache.txt 2009-08-12 12:50:30 UTC (rev 102708)
@@ -13,7 +13,14 @@
>>> server_url = start_server(server_data)
>>> cd(sample_buildout)
+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+
+
Basic use of the extends cache
------------------------------
@@ -375,3 +382,15 @@
Checking for upgrades.
An internal error occured ...
ValueError: install_from_cache set to true with no download cache
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir
More information about the Checkins
mailing list