- changed the download API so that downloading a file returns both the local

path and a flag indicating whether the downloaded copy is a temporary file - use this flag to clean up temporary files both when downloading extended configuration files and in the tests git-svn-id: http://svn.zope.org/repos/main/zc.buildout/trunk@102708 62d5b8a3-27da-0310-9561-8e5933582275

- changed the download API so that downloading a file returns both the local
path and a flag indicating whether the downloaded copy is a temporary file - use this flag to clean up temporary files both when downloading extended configuration files and in the tests git-svn-id: http://svn.zope.org/repos/main/zc.buildout/trunk@102708 62d5b8a3-27da-0310-9561-8e5933582275
b7170549 · tlotze · 17f9715c · b7170549 · b7170549 · b7170549
Commit b7170549 authored Aug 12, 2009 by tlotze
4 changed files
--- a/src/zc/buildout/buildout.py
+++ b/src/zc/buildout/buildout.py
@@ -1238,11 +1238,13 @@ def _open(base, filename, seen, dl_options, override):
    """
    _update_section(dl_options, override)
    _dl_options = _unannotate_section(dl_options.copy())
+    is_temp = False
    download = zc.buildout.download.Download(
        _dl_options, cache=_dl_options.get('extends-cache'), fallback=True,
        hash_name=True)
    if _isurl(filename):
-        fp = open(download(filename))
+        path, is_temp = download(filename)
+        fp = open(path)
        base = filename[:filename.rfind('/')]
    elif _isurl(base):
        if os.path.isabs(filename):
@@ -1250,7 +1252,8 @@ def _open(base, filename, seen, dl_options, override):
            base = os.path.dirname(filename)
        else:
            filename = base + '/' + filename
-            fp = open(download(filename))
+            path, is_temp = download(filename)
+            fp = open(path)
            base = filename[:filename.rfind('/')]
    else:
        filename = os.path.join(base, filename)
@@ -1258,6 +1261,8 @@ def _open(base, filename, seen, dl_options, override):
        base = os.path.dirname(filename)

    if filename in seen:
+        if is_temp:
+            os.unlink(path)
        raise zc.buildout.UserError("Recursive file include", seen, filename)

    root_config_file = not seen
@@ -1268,6 +1273,9 @@ def _open(base, filename, seen, dl_options, override):
    parser = ConfigParser.RawConfigParser()
    parser.optionxform = lambda s: s
    parser.readfp(fp)
+    if is_temp:
+        os.unlink(path)
+
    extends = extended_by = None
    for section in parser.sections():
        options = dict(parser.items(section))

--- a/src/zc/buildout/download.py
+++ b/src/zc/buildout/download.py
@@ -84,11 +84,11 @@ class Download(object):

        """
        if self.cache:
-            local_path = self.download_cached(url, md5sum)
+            local_path, is_temp = self.download_cached(url, md5sum)
        else:
-            local_path = self.download(url, md5sum, path)
+            local_path, is_temp = self.download(url, md5sum, path)

-        return locate_at(local_path, path)
+        return locate_at(local_path, path), is_temp

    def download_cached(self, url, md5sum=None):
        """Download a file from a URL using the cache.
@@ -106,9 +106,10 @@ class Download(object):

        self.logger.debug('Searching cache at %s' % cache_dir)
        if os.path.isfile(cached_path):
+            is_temp = False
            if self.fallback:
                try:
-                    self.download(url, md5sum, cached_path)
+                    _, is_temp = self.download(url, md5sum, cached_path)
                except ChecksumError:
                    raise
                except Exception:
@@ -122,9 +123,9 @@ class Download(object):
        else:
            self.logger.debug('Cache miss; will cache %s as %s' %
                              (url, cached_path))
-            self.download(url, md5sum, cached_path)
+            _, is_temp = self.download(url, md5sum, cached_path)

-        return cached_path
+        return cached_path, is_temp

    def download(self, url, md5sum=None, path=None):
        """Download a file from a URL to a given or temporary path.
@@ -143,7 +144,7 @@ class Download(object):
                raise ChecksumError(
                    'MD5 checksum mismatch for local resource at %r.' %
                    url_path)
-            return locate_at(url_path, path)
+            return locate_at(url_path, path), False

        if self.offline:
            raise zc.buildout.UserError(
@@ -152,18 +153,23 @@ class Download(object):
        self.logger.info('Downloading %s' % url)
        urllib._urlopener = url_opener
        handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
-        tmp_path, headers = urllib.urlretrieve(url, tmp_path)
-        os.close(handle)
-        if not check_md5sum(tmp_path, md5sum):
-            os.remove(tmp_path)
-            raise ChecksumError(
-                'MD5 checksum mismatch downloading %r' % url)
+        try:
+            try:
+                tmp_path, headers = urllib.urlretrieve(url, tmp_path)
+                if not check_md5sum(tmp_path, md5sum):
+                    raise ChecksumError(
+                        'MD5 checksum mismatch downloading %r' % url)
+            except:
+                os.remove(tmp_path)
+                raise
+        finally:
+            os.close(handle)

        if path:
            shutil.move(tmp_path, path)
-            return path
+            return path, False
        else:
-            return tmp_path
+            return tmp_path, True

    def filename(self, url):
        """Determine a file name from a URL according to the configuration.

--- a/src/zc/buildout/download.txt
+++ b/src/zc/buildout/download.txt
@@ -12,6 +12,13 @@ We setup an HTTP server that provides a file we want to download:
 >>> write(server_data, 'foo.txt', 'This is a foo text.')
 >>> server_url = start_server(server_data)

+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
+
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+

 Downloading without using the cache
 -----------------------------------
@@ -25,9 +32,11 @@ without any arguments:
 None

 Downloading a file is achieved by calling the utility with the URL as an
-argument:
+argument. A tuple is returned that consists of the path to the downloaded copy
+of the file and a boolean value indicating whether this is a temporary file
+meant to be cleaned up during the same buildout run:

->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /.../buildout-...
 >>> cat(path)
@@ -36,10 +45,17 @@ This is a foo text.
 As we aren't using the download cache and haven't specified a target path
 either, the download has ended up in a temporary file:

+>>> is_temp
+True
+
 >>> import tempfile
 >>> path.startswith(tempfile.gettempdir())
 True

+We are responsible for cleaning up temporary files behind us:
+
+>>> remove(path)
+
 When trying to access a file that doesn't exist, we'll get an exception:

 >>> download(server_url+'not-there')
@@ -47,39 +63,51 @@ Traceback (most recent call last):
 IOError: ('http error', 404, 'Not Found',
          <httplib.HTTPMessage instance at 0xa0ffd2c>)

+Downloading a local file doesn't produce a temporary file but simply returns
+the local file itself:
+
+>>> download(join(server_data, 'foo.txt'))
+('/sample_files/foo.txt', False)
+
 We can also have the downloaded file's MD5 sum checked:

 >>> try: from hashlib import md5
 ... except ImportError: from md5 import new as md5

->>> path = download(server_url+'foo.txt',
-...                 md5('This is a foo text.').hexdigest())
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          md5('This is a foo text.').hexdigest())
+>>> is_temp
+True
+>>> remove(path)

->>> path = download(server_url+'foo.txt',
-...                 md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt',
+...          md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch downloading 'http://localhost/foo.txt'

 The error message in the event of an MD5 checksum mismatch for a local file
 reads somewhat differently:

->>> path = download(join(server_data, 'foo.txt'),
-...                 md5('This is a foo text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+...               md5('This is a foo text.').hexdigest())
+('/sample_files/foo.txt', False)

->>> path = download(join(server_data, 'foo.txt'),
-...                 md5('The wrong text.').hexdigest())
+>>> download(join(server_data, 'foo.txt'),
+...          md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch for local resource at '/sample_files/foo.txt'.

 Finally, we can download the file to a specified place in the file system:

 >>> target_dir = tmpdir('download-target')
->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 Trying to download a file in offline mode will result in an error:

@@ -91,13 +119,14 @@ UserError: Couldn't download 'http://localhost/foo.txt' in offline mode.
 As an exception to this rule, file system paths and URLs in the ``file``
 scheme will still work:

->>> cat(download(join(server_data, 'foo.txt')))
+>>> cat(download(join(server_data, 'foo.txt'))[0])
 This is a foo text.
->>> cat(download('file://%s/foo.txt' % server_data))
+>>> cat(download('file://%s/foo.txt' % server_data)[0])
 This is a foo text.

 >>> remove(path)

+
 Downloading using the download cache
 ------------------------------------

@@ -118,17 +147,19 @@ first downloaded. The file system path returned by the download utility points
 to the cached copy:

 >>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 Whenever the file is downloaded again, the cached copy is used. Let's change
 the file on the server to see this:

 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
@@ -137,7 +168,7 @@ This is a foo text.
 If we specify an MD5 checksum for a file that is already in the cache, the
 cached copy's checksum will be verified:

->>> path = download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
+>>> download(server_url+'foo.txt', md5('The wrong text.').hexdigest())
 Traceback (most recent call last):
 ChecksumError: MD5 checksum mismatch for cached download
               from 'http://localhost/foo.txt' at '/download-cache/foo.txt'
@@ -147,7 +178,7 @@ will result in the cached copy being used:

 >>> mkdir(server_data, 'other')
 >>> write(server_data, 'other', 'foo.txt', 'The wrong text.')
->>> path = download(server_url+'other/foo.txt')
+>>> path, is_temp = download(server_url+'other/foo.txt')
 >>> print path
 /download-cache/foo.txt
 >>> cat(path)
@@ -161,30 +192,34 @@ cached copy:
 >>> ls(cache)
 >>> write(server_data, 'foo.txt', 'This is a foo text.')

->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False
 >>> ls(cache)
 - foo.txt

 >>> remove(path)
 >>> write(server_data, 'foo.txt', 'The wrong text.')

->>> path = download(server_url+'foo.txt',
-...                 path=join(target_dir, 'downloaded.txt'))
+>>> path, is_temp = download(server_url+'foo.txt',
+...                          path=join(target_dir, 'downloaded.txt'))
 >>> print path
 /download-target/downloaded.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 In offline mode, downloads from any URL will be successful if the file is
 found in the cache:

 >>> download = Download(cache=cache, offline=True)
->>> cat(download(server_url+'foo.txt'))
+>>> cat(download(server_url+'foo.txt')[0])
 This is a foo text.

 Local resources will be cached just like any others since download caches are
@@ -196,14 +231,14 @@ sometimes used to create source distributions:
 >>> write(server_data, 'foo.txt', 'This is a foo text.')
 >>> download = Download(cache=cache)

->>> cat(download('file://' + join(server_data, 'foo.txt'), path=path))
+>>> cat(download('file://' + join(server_data, 'foo.txt'), path=path)[0])
 This is a foo text.
 >>> ls(cache)
 - foo.txt

 >>> remove(cache, 'foo.txt')

->>> cat(download(join(server_data, 'foo.txt'), path=path))
+>>> cat(download(join(server_data, 'foo.txt'), path=path)[0])
 This is a foo text.
 >>> ls(cache)
 - foo.txt
@@ -240,7 +275,7 @@ The namespace sub-directory hasn't been created yet:
 Downloading a file now creates the namespace sub-directory and places a copy
 of the file inside it:

->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/test/foo.txt
 >>> ls(cache)
@@ -249,6 +284,8 @@ d test
 - foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 The next time we want to download that file, the copy from inside the cache
 namespace is used. To see this clearly, we put a file with the same name but
@@ -257,7 +294,7 @@ different content both on the server and in the cache's root directory:
 >>> write(server_data, 'foo.txt', 'The wrong text.')
 >>> write(cache, 'foo.txt', 'The wrong text.')

->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/test/foo.txt
 >>> cat(path)
@@ -278,7 +315,7 @@ depends on URL parameters. In such cases, an MD5 hash of the complete URL may
 be used as the filename in the cache:

 >>> download = Download(cache=cache, hash_name=True)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> print path
 /download-cache/09f5793fcdc1716727f72d49519c688d
 >>> cat(path)
@@ -297,7 +334,7 @@ True
 The cached copy is used when downloading the file again:

 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> path == download(server_url+'foo.txt')
+>>> (path, is_temp) == download(server_url+'foo.txt')
 True
 >>> cat(path)
 This is a foo text.
@@ -308,7 +345,7 @@ If we change the URL, even in such a way that it keeps the base name of the
 file the same, the file will be downloaded again this time and put in the
 cache under a different name:

->>> path2 = download(server_url+'other/foo.txt')
+>>> path2, is_temp = download(server_url+'other/foo.txt')
 >>> print path2
 /download-cache/537b6d73267f8f4447586989af8c470e
 >>> path == path2
@@ -343,11 +380,13 @@ cache is configured in the first place:
 A downloaded file will be cached:

 >>> ls(cache)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> ls(cache)
 - foo.txt
 >>> cat(cache, 'foo.txt')
 This is a foo text.
+>>> is_temp
+False

 If the file cannot be served, the cached copy will be used:

@@ -356,27 +395,33 @@ If the file cannot be served, the cached copy will be used:
 Traceback (most recent call last):
 IOError: ('http error', 404, 'Not Found',
          <httplib.HTTPMessage instance at 0xa35d36c>)
->>> path = download(server_url+'foo.txt')
+>>> path, is_temp = download(server_url+'foo.txt')
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 Similarly, if the file is served but we're in offline mode, we'll fall back to
 using the cache:

 >>> write(server_data, 'foo.txt', 'The wrong text.')
->>> cat(Download()(server_url+'foo.txt'))
-The wrong text.
+>>> get(server_url+'foo.txt')
+'The wrong text.'
+
 >>> offline_download = Download(cache=cache, offline=True, fallback=True)
->>> path = offline_download(server_url+'foo.txt')
+>>> path, is_temp = offline_download(server_url+'foo.txt')
+>>> print path
+/download-cache/foo.txt
 >>> cat(path)
 This is a foo text.
+>>> is_temp
+False

 However, when downloading the file normally with the cache being used in
 fall-back mode, the file will be downloaded from the net and the cached copy
 will be replaced with the new content:

->>> path = download(server_url+'foo.txt')
->>> cat(path)
+>>> cat(download(server_url+'foo.txt')[0])
 The wrong text.
 >>> cat(cache, 'foo.txt')
 The wrong text.
@@ -454,3 +499,15 @@ False
 >>> download = Download({'install-from-cache': 'false'}, offline=True)
 >>> download.offline
 True
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir
--- a/src/zc/buildout/extends-cache.txt
+++ b/src/zc/buildout/extends-cache.txt
@@ -13,6 +13,13 @@ Also, all of the following will take place inside the sample buildout.
 >>> server_url = start_server(server_data)
 >>> cd(sample_buildout)

+We also use a fresh directory for temporary files in order to make sure that
+all temporary files have been cleaned up in the end:
+
+>>> import tempfile
+>>> old_tempdir = tempfile.tempdir
+>>> tempfile.tempdir = tmpdir('tmp')
+

 Basic use of the extends cache
 ------------------------------
@@ -375,3 +382,15 @@ While:
  Checking for upgrades.
 An internal error occured ...
 ValueError: install_from_cache set to true with no download cache
+
+
+Clean up
+--------
+
+We should have cleaned up all temporary files created by downloading things:
+
+>>> ls(tempfile.tempdir)
+
+Reset the global temporary directory:
+
+>>> tempfile.tempdir = old_tempdir