[feat] Extend Download API to use an alternate URL as fallback

This retries with the alternate URL in case of HTTPError with the main one. Used by slapos.recipe.build:download* and slapos.recipe.cmmi recipes.

[feat] Extend Download API to use an alternate URL as fallback
This retries with the alternate URL in case of HTTPError with the main one. Used by slapos.recipe.build:download* and slapos.recipe.cmmi recipes.
958c554f · Julien Muchembled · Xavier Thompson · 7d95ec43 · 958c554f · 958c554f
Commit 958c554f authored Sep 20, 2021 by Julien Muchembled Committed by Xavier Thompson May 03, 2024
3 changed files
--- a/src/zc/buildout/download.py
+++ b/src/zc/buildout/download.py
@@ -20,13 +20,14 @@ except ImportError:

 try:
    # Python 3
+    from urllib.error import HTTPError
    from urllib.request import Request, urlopen
    from urllib.parse import urlparse, urlunparse
 except ImportError:
    # Python 2
    from urlparse import urlparse
    from urlparse import urlunparse
-    from urllib2 import Request, urlopen
+    from urllib2 import Request, urlopen, HTTPError

 from zc.buildout.easy_install import realpath
 from base64 import b64encode
@@ -104,7 +105,7 @@ class Download(object):
        """
        return self.download_cached if self.cache else self.download

-    def download_cached(self, url, md5sum=None, path=None):
+    def download_cached(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL using the cache.

        This method assumes that the cache has been configured.
@@ -137,7 +138,7 @@ class Download(object):
            # Don't download directly to cached_path to minimize
            # the probability to alter old data if download fails.
            try:
-                path, is_temp = self.download(url, md5sum, path)
+                path, is_temp = self.download(url, md5sum, path, alternate_url)
            except ChecksumError:
                raise
            except Exception:
@@ -160,11 +161,11 @@ class Download(object):
        else:
            self.logger.debug('Cache miss; will cache %s as %s' %
                              (url, cached_path))
-            self.download(url, md5sum, cached_path)
+            self.download(url, md5sum, cached_path, alternate_url)

        return locate_at(cached_path, path), False

-    def download(self, url, md5sum=None, path=None):
+    def download(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL to a given or temporary path.

        An online resource is always downloaded to a temporary file and moved
@@ -193,20 +194,29 @@ class Download(object):
                "Couldn't download %r in offline mode." % url)

        self.logger.info('Downloading %s' % url)
+        download_url = url
        tmp_path = path
        cleanup = True
        try:
            if not path:
                handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
                os.close(handle)
-            tmp_path, headers = self.urlretrieve(url, tmp_path)
+            try:
+                tmp_path, headers = self.urlretrieve(url, tmp_path)
+            except HTTPError:
+                if not alternate_url:
+                    raise
+                self.logger.info('using alternate URL: %s', alternate_url)
+                download_url = alternate_url
+                tmp_path, headers = self.urlretrieve(
+                    alternate_url, tmp_path)
            if not check_md5sum(tmp_path, md5sum):
                raise ChecksumError(
-                    'MD5 checksum mismatch downloading %r' % url)
+                    'MD5 checksum mismatch downloading %r' % download_url)
            cleanup = False
        except IOError as e:
            raise zc.buildout.UserError("Error downloading %s: %s"
-                                        % (url, e))
+                                        % (download_url, e))
        finally:
            if cleanup and tmp_path:
                remove(tmp_path)

--- a/src/zc/buildout/tests/download.txt
+++ b/src/zc/buildout/tests/download.txt
@@ -63,6 +63,32 @@ When trying to access a file that doesn't exist, we'll get an exception:
 ... else: print_('woops')
 download error

+An alternate URL can be used in case of HTTPError with the main one.
+Useful when a version of a resource can only be downloaded with a temporary
+URL as long as it's the last version, and this version is then moved to a
+permanent place when a newer version is released. In such case, when using
+a cache, it's important that the main URL (`url`) is always used as cache key.
+And `alternate_url` shall be the temporary URL.
+
+>>> path, is_temp = download(server_url+'not-there',
+...                          alternate_url=server_url+'foo.txt')
+>>> cat(path)
+This is a foo text.
+>>> is_temp
+True
+>>> remove(path)
+
+The main URL is tried first:
+
+>>> write(server_data, 'other.txt', 'This is some other text.')
+>>> path, is_temp = download(server_url+'other.txt',
+...                          alternate_url=server_url+'foo.txt')
+>>> cat(path)
+This is some other text.
+>>> is_temp
+True
+>>> remove(path)
+
 Downloading a local file doesn't produce a temporary file but simply returns
 the local file itself:


--- a/src/zc/buildout/tests/extends-cache.txt.disabled
+++ b/src/zc/buildout/tests/extends-cache.txt.disabled
@@ -492,9 +492,9 @@ a better solution would re-use the logging already done by the utility.)

 >>> import zc.buildout
 >>> old_download = zc.buildout.download.Download.download
->>> def wrapper_download(self, url, md5sum=None, path=None):
+>>> def wrapper_download(self, url, *args, **kw):
 ...   print_("The URL %s was downloaded." % url)
-...   return old_download(url, md5sum, path)
+...   return old_download(url, *args, **kw)
 >>> zc.buildout.download.Download.download = wrapper_download

 >>> zc.buildout.buildout.main([])