ZODB: import patch "incpozo"

Patch submitted here: https://github.com/zopefoundation/ZODB/pull/403 We backport it in SlapOS directly as the branch 5.8 is not being developed anymore.

ZODB: import patch "incpozo"
Patch submitted here: https://github.com/zopefoundation/ZODB/pull/403 We backport it in SlapOS directly as the branch 5.8 is not being developed anymore.
289e1cac · Nicolas Wavrant · Jérome Perrin · 95b1d7df · 289e1cac · 289e1cac
Commit 289e1cac authored Feb 18, 2025 by Nicolas Wavrant Committed by Jérome Perrin Feb 18, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 861 additions and 2 deletions

component/ZODB/buildout.cfg component/ZODB/buildout.cfg +10 -2

component/ZODB/incpozo-5.8.0.patch component/ZODB/incpozo-5.8.0.patch +851 -0

No files found.
--- a/component/ZODB/buildout.cfg
+++ b/component/ZODB/buildout.cfg
@@ -60,14 +60,22 @@ egg-versions =
 [ZODB5]
 <= _ZODB
 egg-versions =
-  ZODB = 5.8.1
+  ZODB = 5.8.1+SlapOSPatched001
  transaction = 4.0.0
+patch-binary = ${patch:location}/bin/patch
+ZODB-patches =
+  ${:_profile_base_location_}/incpozo-5.8.0.patch#99a9e7889b06ab3fdb1adbd2b152afba
+ZODB-patch-options = -p1

 [ZODB5:python2]
 <= _ZODB
 egg-versions =
-  ZODB = 5.8.1
+  ZODB = 5.8.1+SlapOSPatched001
  transaction = 3.0.1
+patch-binary = ${patch:location}/bin/patch
+ZODB-patches =
+  ${:_profile_base_location_}/incpozo-5.8.0.patch#99a9e7889b06ab3fdb1adbd2b152afba
+ZODB-patch-options = -p1

 [ZODB6]
 <= _ZODB

--- a/component/ZODB/incpozo-5.8.0.patch
+++ b/component/ZODB/incpozo-5.8.0.patch
+From 265adfd18f3d5d1c478e63037e37f250604d881d Mon Sep 17 00:00:00 2001
+From: Nicolas Wavrant <nicolas.wavrant@nexedi.com>
+Date: Fri, 18 Oct 2024 10:21:38 +0900
+Subject: [PATCH 1/5] repozo: factorize code doing the actual recover (write),
+ in preparation to the implementation of the incremental recover
+
+---
+ src/ZODB/scripts/repozo.py | 71 ++++++++++++++++++++------------------
+ 1 file changed, 37 insertions(+), 34 deletions(-)
+
+diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py
+index 8dce5dc80..32d54f561 100755
+--- a/src/ZODB/scripts/repozo.py
+++ b/src/ZODB/scripts/repozo.py
+@@ -396,6 +396,42 @@ def func(data):
+     return bytesread, sum.hexdigest()
+ 
+ 
+def recover_repofiles(options, repofiles, outfp):
+    if options.withverify:
+        datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+        with open(datfile) as fp:
+            truth_dict = {}
+            for line in fp:
+                fn, startpos, endpos, sum = line.split()
+                startpos = int(startpos)
+                endpos = int(endpos)
+                filename = os.path.join(options.repository,
+                                        os.path.basename(fn))
+                truth_dict[filename] = {
+                    'size': endpos - startpos,
+                    'sum': sum,
+                }
+        totalsz = 0
+        for repofile in repofiles:
+            reposz, reposum = concat([repofile], outfp)
+            expected_truth = truth_dict[repofile]
+            if reposz != expected_truth['size']:
+                raise VerificationFail(
+                    "%s is %d bytes, should be %d bytes" % (
+                        repofile, reposz, expected_truth['size']))
+            if reposum != expected_truth['sum']:
+                raise VerificationFail(
+                    "%s has checksum %s instead of %s".format(
+                        repofile, reposum, expected_truth['sum']))
+            totalsz += reposz
+            log("Recovered chunk %s : %s bytes, md5: %s",
+                repofile, reposz, reposum)
+        log("Recovered a total of %s bytes", totalsz)
+    else:
+        reposz, reposum = concat(repofiles, outfp)
+        log('Recovered %s bytes, md5: %s', reposz, reposum)
+
+
+ def gen_filedate(options):
+     return getattr(options, 'test_now', time.gmtime()[:6])
+ 
+@@ -698,40 +734,7 @@ def do_recover(options):
+         files_to_close += (outfp,)
+ 
+     try:
+-        if options.withverify:
+-            datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+-            with open(datfile) as fp:
+-                truth_dict = {}
+-                for line in fp:
+-                    fn, startpos, endpos, sum = line.split()
+-                    startpos = int(startpos)
+-                    endpos = int(endpos)
+-                    filename = os.path.join(options.repository,
+-                                            os.path.basename(fn))
+-                    truth_dict[filename] = {
+-                        'size': endpos - startpos,
+-                        'sum': sum,
+-                    }
+-            totalsz = 0
+-            for repofile in repofiles:
+-                reposz, reposum = concat([repofile], outfp)
+-                expected_truth = truth_dict[repofile]
+-                if reposz != expected_truth['size']:
+-                    raise VerificationFail(
+-                        "%s is %d bytes, should be %d bytes" % (
+-                            repofile, reposz, expected_truth['size']))
+-                if reposum != expected_truth['sum']:
+-                    raise VerificationFail(
+-                        "%s has checksum %s instead of %s" % (
+-                            repofile, reposum, expected_truth['sum']))
+-                totalsz += reposz
+-                log("Recovered chunk %s : %s bytes, md5: %s",
+-                    repofile, reposz, reposum)
+-            log("Recovered a total of %s bytes", totalsz)
+-        else:
+-            reposz, reposum = concat(repofiles, outfp)
+-            log('Recovered %s bytes, md5: %s', reposz, reposum)
+-
+        recover_repofiles(options, repofiles, outfp)
+         if options.output is not None:
+             last_base = os.path.splitext(repofiles[-1])[0]
+             source_index = '%s.index' % last_base
+
+From f5eacd0a6116a977f65c1efe5a5eb5945d458fad Mon Sep 17 00:00:00 2001
+From: Nicolas Wavrant <nicolas.wavrant@nexedi.com>
+Date: Thu, 17 Oct 2024 17:20:03 +0900
+Subject: [PATCH 2/5] repozo: support incremental recover
+
+Which allows to recover a zodb filestorage by only appending the missing
+chunks from the latest recovered file, instead of always recovering from
+zero.
+
+Based on the work of @vpelletier (incpozo).
+---
+ src/ZODB/scripts/repozo.py            | 121 +++++++++++---
+ src/ZODB/scripts/tests/test_repozo.py | 227 +++++++++++++++++++++++++-
+ 3 files changed, 331 insertions(+), 28 deletions(-)
+
+diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py
+index 32d54f561..283cae2a3 100755
+--- a/src/ZODB/scripts/repozo.py
+++ b/src/ZODB/scripts/repozo.py
+@@ -73,6 +73,12 @@
+         Note:  for the stdout case, the index file will **not** be restored
+         automatically.
+ 
+    -F / --full
+        Force a full recover. By default, an incremental recover is made
+        if possible, by only copying the latest backup delta to the recovered
+        ZODB file. A full recover will always be done if a pack has occured
+        since the last incremental backup.
+
+     -w
+     --with-verify
+         Verify on the fly the backup files on recovering. This option runs
+@@ -185,7 +191,7 @@ class Options:
+         mode = None         # BACKUP, RECOVER or VERIFY
+         file = None         # name of input Data.fs file
+         repository = None   # name of directory holding backups
+-        full = False        # True forces full backup
+        full = False        # True forces full backup or full recovery
+         date = None         # -D argument, if any
+         output = None       # where to write recovered data; None = stdout
+         quick = False       # -Q flag state
+@@ -396,9 +402,8 @@ def func(data):
+     return bytesread, sum.hexdigest()
+ 
+ 
+-def recover_repofiles(options, repofiles, outfp):
+def recover_repofiles(options, repofiles, datfile, outfp):
+     if options.withverify:
+-        datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+         with open(datfile) as fp:
+             truth_dict = {}
+             for line in fp:
+@@ -709,15 +714,7 @@ def do_backup(options):
+     do_full_backup(options)
+ 
+ 
+-def do_recover(options):
+-    # Find the first full backup at or before the specified date
+-    repofiles = find_files(options)
+-    if not repofiles:
+-        if options.date:
+-            raise NoFiles('No files in repository before %s' % (options.date,))
+-        else:
+-            raise NoFiles('No files in repository')
+-
+def do_full_recover(options, repofiles):
+     files_to_close = ()
+     if options.output is None:
+         log('Recovering file to stdout')
+@@ -734,17 +731,8 @@ def do_recover(options):
+         files_to_close += (outfp,)
+ 
+     try:
+-        recover_repofiles(options, repofiles, outfp)
+-        if options.output is not None:
+-            last_base = os.path.splitext(repofiles[-1])[0]
+-            source_index = '%s.index' % last_base
+-            target_index = '%s.index' % options.output
+-            if os.path.exists(source_index):
+-                log('Restoring index file %s to %s',
+-                    source_index, target_index)
+-                shutil.copyfile(source_index, target_index)
+-            else:
+-                log('No index file to restore: %s', source_index)
+        datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+        recover_repofiles(options, repofiles, datfile, outfp)
+     finally:
+         for f in files_to_close:
+             f.close()
+@@ -758,6 +746,93 @@ def do_recover(options):
+             raise
+ 
+ 
+def do_incremental_recover(options, repofiles):
+    datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+    log('Recovering (incrementally) file to %s', options.output)
+    with open(options.output, 'r+b') as outfp:
+        outfp.seek(0, 2)
+        initial_length = outfp.tell()
+    with open(datfile) as fp:
+        previous_chunk = None
+        for line in fp:
+            fn, startpos, endpos, _ = chunk = line.split()
+            startpos = int(startpos)
+            endpos = int(endpos)
+            if endpos > initial_length:
+                break
+            previous_chunk = chunk
+
+    if previous_chunk is None:
+        log('Target file smaller than full backup, '
+            'falling back to a full recover.')
+        return do_full_recover(options, repofiles)
+    if endpos < initial_length:
+        log('Target file is larger than latest backup, '
+            'falling back to a full recover.')
+        return do_full_recover(options, repofiles)
+    check_startpos = int(previous_chunk[1])
+    check_endpos = int(previous_chunk[2])
+    with open(options.output, 'r+b') as outfp:
+        outfp.seek(check_startpos)
+        check_sum = checksum(outfp, check_endpos - check_startpos)
+    if endpos == initial_length and chunk[3] == check_sum:
+        log('Target file is same size as latest backup, '
+            'doing nothing.')
+        return
+    elif previous_chunk[3] != check_sum:
+        if endpos == initial_length:
+            log('Target file is not consistent with latest backup, '
+                'falling back to a full recover.')
+            return do_full_recover(options, repofiles)
+        else:
+            log('Last whole common chunk checksum did not match with backup, '
+                'falling back to a full recover.')
+            return do_full_recover(options, repofiles)
+
+    filename = os.path.join(options.repository,
+                            os.path.basename(fn))
+    first_file_to_restore = repofiles.index(filename)
+    assert first_file_to_restore > 0, (
+        first_file_to_restore, options.repository, fn, filename, repofiles)
+
+    temporary_output_file = options.output + '.part'
+    os.rename(options.output, temporary_output_file)
+    with open(temporary_output_file, 'r+b') as outfp:
+        outfp.seek(startpos)
+        recover_repofiles(options,
+                          repofiles[first_file_to_restore:],
+                          datfile,
+                          outfp)
+    os.rename(temporary_output_file, options.output)
+
+
+def do_recover(options):
+    # Find the first full backup at or before the specified date
+    repofiles = find_files(options)
+    if not repofiles:
+        if options.date:
+            raise NoFiles('No files in repository before %s' % (options.date,))
+        else:
+            raise NoFiles('No files in repository')
+
+    if (options.full or options.output is None
+            or not os.path.exists(options.output)):
+        do_full_recover(options, repofiles)
+    else:
+        do_incremental_recover(options, repofiles)
+
+    if options.output is not None:
+        last_base = os.path.splitext(repofiles[-1])[0]
+        source_index = '%s.index' % last_base
+        target_index = '%s.index' % options.output
+        if os.path.exists(source_index):
+            log('Restoring index file %s to %s',
+                source_index, target_index)
+            shutil.copyfile(source_index, target_index)
+        else:
+            log('No index file to restore: %s', source_index)
+
+
+ def do_verify(options):
+     # Verify the sizes and checksums of all files mentioned in the .dat file
+     repofiles = find_files(options)
+diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py
+index 13d24bef5..eccb4f1c1 100644
+--- a/src/ZODB/scripts/tests/test_repozo.py
+++ b/src/ZODB/scripts/tests/test_repozo.py
+@@ -864,8 +864,7 @@ def test_w_changes(self):
+         self.assertEqual(index.maxKey(), db.maxkey)
+ 
+ 
+-class Test_do_recover(OptionsTestBase, unittest.TestCase):
+-
+class Mixin_do_recover:
+     def _callFUT(self, options):
+         from ZODB.scripts.repozo import do_recover
+         return do_recover(options)
+@@ -902,6 +901,17 @@ def test_no_files_before_explicit_date(self):
+             files.append(self._makeFile(h, m, s, e))
+         self.assertRaises(NoFiles, self._callFUT, options)
+ 
+
+class Test_do_full_recover(
+        Mixin_do_recover,
+        OptionsTestBase,
+        unittest.TestCase
+):
+    def _makeOptions(self, **kw):
+        options = super()._makeOptions(**kw)
+        options.full = True
+        return options
+
+     def test_w_full_backup_latest_no_index(self):
+         import tempfile
+         dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+@@ -1010,6 +1020,216 @@ def test_w_incr_backup_with_verify_size_inconsistent(self):
+         self.assertTrue(os.path.exists(output + '.part'))
+ 
+ 
+class Test_do_incremental_recover(
+        Mixin_do_recover,
+        OptionsTestBase,
+        unittest.TestCase
+):
+    def setUp(self):
+        from ZODB.scripts import repozo
+        self._old_verbosity = repozo.VERBOSE
+        self._old_stderr = sys.stderr
+        repozo.VERBOSE = True
+        sys.stderr = StringIO()
+
+    def tearDown(self):
+        from ZODB.scripts import repozo
+        sys.stderr = self._old_stderr
+        repozo.VERBOSE = self._old_verbosity
+
+    def _makeOptions(self, **kw):
+        options = super()._makeOptions(**kw)
+        options.full = False
+        return options
+
+    def _createRecoveredDataFS(self, output, options):
+        self._makeFile(2, 3, 4, '.fs', 'AAA')
+        self._makeFile(4, 5, 6, '.deltafs', 'BBB')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n')  # noqa: E501 line too long
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'AAABBB')
+        self.assertFalse(os.path.exists(output + '.part'))
+        return output
+
+    def test_do_nothing(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        self._callFUT(options)
+        self.assertIn(
+            "doing nothing", sys.stderr.getvalue())
+
+    def test_w_incr_recover_from_incr_backup(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        # Create 2 more .deltafs, to prove the code knows where to pick up
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(8, 9, 10, '.deltafs', 'DDD')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n')  # noqa: E501 line too long
+        os.unlink(
+            os.path.join(self._repository_directory,
+                         '2010-05-14-04-05-06.deltafs'))
+        self._callFUT(options)
+        self.assertNotIn('falling back to a full recover.',
+                         sys.stderr.getvalue())
+        self.assertEqual(_read_file(output), b'AAABBBCCCDDD')
+        self.assertFalse(os.path.exists(output + '.part'))
+
+    def test_w_incr_backup_with_verify_sum_inconsistent(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=True)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(8, 9, 10, '.deltafs', 'DDD')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166af\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n')  # noqa: E501 line too long
+        from ZODB.scripts.repozo import VerificationFail
+        self.assertRaises(VerificationFail, self._callFUT, options)
+        self.assertNotIn('falling back to a full recover.',
+                         sys.stderr.getvalue())
+        self.assertTrue(os.path.exists(output + '.part'))
+
+    def test_w_incr_backup_with_verify_size_inconsistent_too_small(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=True)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(8, 9, 10, '.deltafs', 'DDD')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 8 defb99e69a9f1f6e06f15006b1f166ae\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n')  # noqa: E501 line too long
+        from ZODB.scripts.repozo import VerificationFail
+        self.assertRaises(VerificationFail, self._callFUT, options)
+        self.assertNotIn('falling back to a full recover.',
+                         sys.stderr.getvalue())
+        self.assertTrue(os.path.exists(output + '.part'))
+
+    def test_w_incr_backup_with_verify_size_inconsistent_too_big(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=True)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(8, 9, 10, '.deltafs', 'DDD')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 10 defb99e69a9f1f6e06f15006b1f166ae\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n')  # noqa: E501 line too long
+        from ZODB.scripts.repozo import VerificationFail
+        self.assertRaises(VerificationFail, self._callFUT, options)
+        self.assertNotIn('falling back to a full recover.',
+                         sys.stderr.getvalue())
+        self.assertTrue(os.path.exists(output + '.part'))
+
+    def test_w_incr_backup_switch_auto_to_full_recover_if_output_larger_than_dat(self):  # noqa: E501 line too long
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n')  # noqa: E501 line too long
+        # The ZODB is longer than announced in the .dat file
+        with open(output, 'r+b') as f:
+            f.write(b'AAABBBCCCDDD')
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'AAABBBCCC')
+        self.assertFalse(os.path.exists(output + '.part'))
+        self.assertIn(
+            "falling back to a full recover", sys.stderr.getvalue())
+
+    def test_w_incr_backup_switch_auto_to_full_recover_if_chunk_is_wrong(self):  # noqa: E501 line too long
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n')  # noqa: E501 line too long
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'AAABBBCCC')
+        self.assertFalse(os.path.exists(output + '.part'))
+        self.assertIn(
+            "Last whole common chunk checksum did not match with backup, falling back to a full recover.",  # noqa: E501 line too long
+            sys.stderr.getvalue())
+
+    def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    withverify=False)
+        self._makeFile(2, 3, 4, '.fs', 'AAA')
+        self._makeFile(4, 5, 6, '.deltafs', 'BBB')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n')  # noqa: E501 line too long
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'AAABBB')
+
+        self._makeFile(6, 7, 8, '.fs', 'CCDD')
+        self._makeFile(
+            6, 7, 8, '.dat',
+            '/backup/2010-05-14-06-07-08.fs 0 4 dc0ee37408176d839c13f291a4d588de\n')  # noqa: E501 line too long
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'CCDD')
+        self.assertFalse(os.path.exists(output + '.part'))
+        self.assertIn(
+            'Target file is larger than latest backup, falling back to a full recover.',  # noqa: E501 line too long
+            sys.stderr.getvalue())
+
+
+ class Test_do_verify(OptionsTestBase, unittest.TestCase):
+ 
+     def _callFUT(self, options):
+@@ -1296,7 +1516,8 @@ def test_suite():
+         unittest.makeSuite(Test_do_full_backup),
+         unittest.makeSuite(Test_do_incremental_backup),
+         # unittest.makeSuite(Test_do_backup),  #TODO
+-        unittest.makeSuite(Test_do_recover),
+        unittest.makeSuite(Test_do_full_recover),
+        unittest.makeSuite(Test_do_incremental_recover),
+         unittest.makeSuite(Test_do_verify),
+         # N.B.:  this test take forever to run (~40sec on a fast laptop),
+         # *and* it is non-deterministic.
+
+From c21a6df92544302d971594c893452224c3e71279 Mon Sep 17 00:00:00 2001
+From: Nicolas Wavrant <nicolas.wavrant@nexedi.com>
+Date: Thu, 14 Nov 2024 16:17:42 +0900
+Subject: [PATCH 3/5] repozo: integrate a quick option for the incremental
+ recover
+
+The default behavior becomes to check the integrality of the existing
+recovered Data.fs before incrementally restore on it. The quick option
+allows to verify only the latest chunck previously recovered on the
+Data.fs before restoring incrementally. This saves many reads.
+---
+ src/ZODB/scripts/repozo.py            | 59 +++++++++++-------
+ src/ZODB/scripts/tests/test_repozo.py | 87 +++++++++++++++++++++++++--
+ 2 files changed, 120 insertions(+), 26 deletions(-)
+
+diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py
+index 283cae2a3..8240bebc7 100755
+--- a/src/ZODB/scripts/repozo.py
+++ b/src/ZODB/scripts/repozo.py
+@@ -79,6 +79,13 @@
+         ZODB file. A full recover will always be done if a pack has occured
+         since the last incremental backup.
+ 
+    -Q / --quick
+        Verify via md5 checksum only the last incremental recovered of the
+        output file.  This reduces the disk i/o at the (theoretical) cost of
+        inconsistency.  This is a probabilistic way of determining whether a
+        full recover is necessary.  This argument is ignored when -F / --full
+        is used.
+
+     -w
+     --with-verify
+         Verify on the fly the backup files on recovering. This option runs
+@@ -267,6 +274,9 @@ class Options:
+         if options.killold:
+             log('--kill-old-on-full option is ignored in recover mode')
+             options.killold = False
+        if options.full and options.quick:
+            log('--quick option is ignored if --full option is used')
+            options.quick = None
+     else:
+         assert options.mode == VERIFY
+         if options.date is not None:
+@@ -752,42 +762,49 @@ def do_incremental_recover(options, repofiles):
+     with open(options.output, 'r+b') as outfp:
+         outfp.seek(0, 2)
+         initial_length = outfp.tell()
+-    with open(datfile) as fp:
+-        previous_chunk = None
+
+    error = ''
+    previous_chunk = None
+    with open(datfile) as fp, open(options.output, 'r+b') as outfp:
+         for line in fp:
+-            fn, startpos, endpos, _ = chunk = line.split()
+            fn, startpos, endpos, check_sum = chunk = line.split()
+             startpos = int(startpos)
+             endpos = int(endpos)
+             if endpos > initial_length:
+                 break
+            if not options.quick:
+                if check_sum != checksum(outfp, endpos - startpos):
+                    error = ('Target file is not consistent with backup %s, '
+                             'falling back to a full recover.') % fn
+                    break
+             previous_chunk = chunk
+-
+-    if previous_chunk is None:
+    if error:
+        log(error)
+        return do_full_recover(options, repofiles)
+    elif previous_chunk is None:
+         log('Target file smaller than full backup, '
+             'falling back to a full recover.')
+         return do_full_recover(options, repofiles)
+-    if endpos < initial_length:
+    elif endpos < initial_length:
+         log('Target file is larger than latest backup, '
+             'falling back to a full recover.')
+         return do_full_recover(options, repofiles)
+-    check_startpos = int(previous_chunk[1])
+-    check_endpos = int(previous_chunk[2])
+-    with open(options.output, 'r+b') as outfp:
+-        outfp.seek(check_startpos)
+-        check_sum = checksum(outfp, check_endpos - check_startpos)
+-    if endpos == initial_length and chunk[3] == check_sum:
+    if options.quick:
+        check_startpos = int(previous_chunk[1])
+        check_endpos = int(previous_chunk[2])
+        with open(options.output, 'r+b') as outfp:
+            outfp.seek(check_startpos)
+            if previous_chunk[3] != checksum(
+                    outfp, check_endpos - check_startpos):
+                error = ('Target file is not consistent with backup %s, '
+                         'falling back to a full recover.' % previous_chunk[0])
+        if error:
+            log(error)
+            return do_full_recover(options, repofiles)
+    if endpos == initial_length:
+         log('Target file is same size as latest backup, '
+             'doing nothing.')
+         return
+-    elif previous_chunk[3] != check_sum:
+-        if endpos == initial_length:
+-            log('Target file is not consistent with latest backup, '
+-                'falling back to a full recover.')
+-            return do_full_recover(options, repofiles)
+-        else:
+-            log('Last whole common chunk checksum did not match with backup, '
+-                'falling back to a full recover.')
+-            return do_full_recover(options, repofiles)
+ 
+     filename = os.path.join(options.repository,
+                             os.path.basename(fn))
+diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py
+index eccb4f1c1..9d5696e72 100644
+--- a/src/ZODB/scripts/tests/test_repozo.py
+++ b/src/ZODB/scripts/tests/test_repozo.py
+@@ -220,13 +220,16 @@ def test_recover_ignored_args(self):
+         from ZODB.scripts import repozo
+         options = repozo.parseargs(['-R', '-r', '/tmp/nosuchdir', '-v',
+                                     '-f', '/tmp/ignored.fs',
+-                                    '-k'])
+                                    '-k', '--full', '--quick'])
+         self.assertEqual(options.file, None)
+         self.assertIn('--file option is ignored in recover mode',
+                       sys.stderr.getvalue())
+         self.assertEqual(options.killold, False)
+         self.assertIn('--kill-old-on-full option is ignored in recover mode',
+                       sys.stderr.getvalue())
+        self.assertEqual(options.quick, None)
+        self.assertIn('--quick option is ignored if --full option is used',
+                      sys.stderr.getvalue())
+ 
+     def test_verify_ignored_args(self):
+         from ZODB.scripts import repozo
+@@ -1040,6 +1043,7 @@ def tearDown(self):
+     def _makeOptions(self, **kw):
+         options = super()._makeOptions(**kw)
+         options.full = False
+        options.quick = kw.get('quick', False)
+         return options
+ 
+     def _createRecoveredDataFS(self, output, options):
+@@ -1092,6 +1096,33 @@ def test_w_incr_recover_from_incr_backup(self):
+         self.assertEqual(_read_file(output), b'AAABBBCCCDDD')
+         self.assertFalse(os.path.exists(output + '.part'))
+ 
+    def test_w_quick_incr_recover_from_incr_backup(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    quick=True,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        # Create 2 more .deltafs, to prove the code knows where to pick up
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(8, 9, 10, '.deltafs', 'DDD')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n')  # noqa: E501 line too long
+        os.unlink(
+            os.path.join(self._repository_directory,
+                         '2010-05-14-04-05-06.deltafs'))
+        self._callFUT(options)
+        self.assertNotIn('falling back to a full recover.',
+                         sys.stderr.getvalue())
+        self.assertEqual(_read_file(output), b'AAABBBCCCDDD')
+        self.assertFalse(os.path.exists(output + '.part'))
+
+     def test_w_incr_backup_with_verify_sum_inconsistent(self):
+         import tempfile
+         dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+@@ -1192,14 +1223,14 @@ def test_w_incr_backup_switch_auto_to_full_recover_if_chunk_is_wrong(self):  # n
+         self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+         self._makeFile(
+             2, 3, 4, '.dat',
+-            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+-            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b8\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+             '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n')  # noqa: E501 line too long
+         self._callFUT(options)
+         self.assertEqual(_read_file(output), b'AAABBBCCC')
+         self.assertFalse(os.path.exists(output + '.part'))
+         self.assertIn(
+-            "Last whole common chunk checksum did not match with backup, falling back to a full recover.",  # noqa: E501 line too long
+            "Target file is not consistent with backup /backup/2010-05-14-02-03-04.fs, falling back to a full recover.",  # noqa: E501 line too long
+             sys.stderr.getvalue())
+ 
+     def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self):
+@@ -1226,9 +1257,55 @@ def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self):
+         self.assertEqual(_read_file(output), b'CCDD')
+         self.assertFalse(os.path.exists(output + '.part'))
+         self.assertIn(
+-            'Target file is larger than latest backup, falling back to a full recover.',  # noqa: E501 line too long
+            "Target file is not consistent with backup /backup/2010-05-14-06-07-08.fs, falling back to a full recover.",  # noqa: E501 line too long
+             sys.stderr.getvalue())
+ 
+    def test_w_quick_incr_backup_switch_auto_to_full_recover_if_last_chunk_is_wrong(self):  # noqa: E501 line too long
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    quick=True,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n')  # noqa: E501 line too long
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'AAABBBCCC')
+        self.assertFalse(os.path.exists(output + '.part'))
+        self.assertIn(
+            "Target file is not consistent with backup /backup/2010-05-14-04-05-06.deltafs, falling back to a full recover.",  # noqa: E501 line too long
+            sys.stderr.getvalue())
+
+    def test_w_quick_incr_backup_dont_see_old_inconsistencies(self):
+        import tempfile
+        dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
+        output = os.path.join(dd, 'Data.fs')
+        options = self._makeOptions(date='2010-05-15-13-30-57',
+                                    output=output,
+                                    quick=True,
+                                    withverify=False)
+        self._createRecoveredDataFS(output, options)
+        self._makeFile(6, 7, 8, '.deltafs', 'CCC')
+        self._makeFile(
+            2, 3, 4, '.dat',
+            '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n'  # noqa: E501 line too long
+            '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n')  # noqa: E501 line too long
+        # The ZODB is longer than announced in the .dat file
+        with open(output, 'r+b') as f:
+            f.write(b'ZZZBBBCCC')
+        self._callFUT(options)
+        self.assertEqual(_read_file(output), b'ZZZBBBCCC')
+        self.assertFalse(os.path.exists(output + '.part'))
+        self.assertNotIn(
+            "falling back to a full recover", sys.stderr.getvalue())
+
+ 
+ class Test_do_verify(OptionsTestBase, unittest.TestCase):
+ 
+
+From 93a1e0653873067db46cc675c548750310189576 Mon Sep 17 00:00:00 2001
+From: Nicolas Wavrant <nicolas.wavrant@nexedi.com>
+Date: Fri, 15 Nov 2024 16:25:09 +0900
+Subject: [PATCH 4/5] repozo: be more explicit on the FileStorage size
+ retrieval methodology
+
+---
+ src/ZODB/scripts/repozo.py | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py
+index 8240bebc7..aaf5620a6 100755
+--- a/src/ZODB/scripts/repozo.py
+++ b/src/ZODB/scripts/repozo.py
+@@ -760,7 +760,10 @@ def do_incremental_recover(options, repofiles):
+     datfile = os.path.splitext(repofiles[0])[0] + '.dat'
+     log('Recovering (incrementally) file to %s', options.output)
+     with open(options.output, 'r+b') as outfp:
+-        outfp.seek(0, 2)
+        # Note that we do not open the FileStorage to use getSize here,
+        # we really want the actual file size, even if there is invalid
+        # transaction data at the end.
+        outfp.seek(0, os.SEEK_END)
+         initial_length = outfp.tell()
+ 
+     error = ''
+
+From 3b4abd939171fc5af92de3016f66ce20f55e5a2f Mon Sep 17 00:00:00 2001
+From: Nicolas Wavrant <nicolas.wavrant@nexedi.com>
+Date: Fri, 15 Nov 2024 16:53:04 +0900
+Subject: [PATCH 5/5] repozo: log files that will be used by the incremental
+ recovery
+
+As find_files logs all the files needed to recover the state from 0,
+add a log to show which files will be effectively used in the case
+of an incremental recovery.
+---
+ src/ZODB/scripts/repozo.py | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py
+index aaf5620a6..015418c12 100755
+--- a/src/ZODB/scripts/repozo.py
+++ b/src/ZODB/scripts/repozo.py
+@@ -814,6 +814,9 @@ def do_incremental_recover(options, repofiles):
+     first_file_to_restore = repofiles.index(filename)
+     assert first_file_to_restore > 0, (
+         first_file_to_restore, options.repository, fn, filename, repofiles)
+    log('remaining files needed to recover incrementally onto target file:')
+    for f in repofiles[first_file_to_restore:]:
+        log('\t%s', f)
+ 
+     temporary_output_file = options.output + '.part'
+     os.rename(options.output, temporary_output_file)