Port zodbtools to py3

Penultimate patch needs `bstr` from pygolang to work ok (see pygolang@c9648c44), but it won't hurt if we merge this without waiting for pygolang bits because without bstr zodbtools continues to work ok on py2, and it will be py3 mode which will not work fully ok. Previous discussions and py3 porting attempts: - nexedi/zodbtools!8 (comment 73726) - nexedi/zodbtools!12 - conversation from nexedi/zodbtools!13 (comment 81553) to nexedi/zodbtools!13 (comment 81874) - nexedi/zodbtools!19 (comment 129023) - 42799cf6 (comment 166403) /reviewed-by @jerome /reviewed-on nexedi/zodbtools!23

Port zodbtools to py3
Penultimate patch needs `bstr` from pygolang to work ok (see pygolang@c9648c44), but it won't hurt if we merge this without waiting for pygolang bits because without bstr zodbtools continues to work ok on py2, and it will be py3 mode which will not work fully ok. Previous discussions and py3 porting attempts: - nexedi/zodbtools!8 (comment 73726) - nexedi/zodbtools!12 - conversation from nexedi/zodbtools!13 (comment 81553) to nexedi/zodbtools!13 (comment 81874) - nexedi/zodbtools!19 (comment 129023) - 42799cf6 (comment 166403) /reviewed-by @jerome /reviewed-on nexedi/zodbtools!23
7ae5ff82 · Kirill Smelkov · 80559a94 · 65ebbe7b · 7ae5ff82 · 7ae5ff82
Commit 7ae5ff82 authored Sep 08, 2022 by Kirill Smelkov
9 changed files
--- a/zodbtools/test/test_analyze.py
+++ b/zodbtools/test/test_analyze.py
 # -*- coding: utf-8 -*-
-# Copyright (C) 2019 Nexedi SA and Contributors.
+# Copyright (C) 2019-2022 Nexedi SA and Contributors.
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -18,14 +18,20 @@
 # See https://www.nexedi.com/licensing for rationale and options.

 from zodbtools.zodbanalyze import analyze, report
+from zodbtools.test.testutil import fs1_testdata_py23
+from zodbtools.util import fromhex
 import os.path
+from golang import b


-def test_zodbanalyze(capsys):
+def test_zodbanalyze(tmpdir, capsys):
+    tfs1 = fs1_testdata_py23(tmpdir,
+                    os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))
+
    for use_dbm in (False, True):
        report(
            analyze(
-                os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
+                tfs1,
                use_dbm=use_dbm,
                delta_fs=False,
                tidmin=None,
@@ -40,7 +46,7 @@ def test_zodbanalyze(capsys):
    # csv output
    report(
        analyze(
-            os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
+            tfs1,
            use_dbm=False,
            delta_fs=False,
            tidmin=None,
@@ -61,14 +67,14 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577
    # empty range
    report(
        analyze(
-            os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
+            tfs1,
            use_dbm=False,
            delta_fs=False,
-            tidmin="ffffffffffffffff",
+            tidmin=fromhex("ffffffffffffffff"),
            tidmax=None,
        ),
        csv=False,
    )
    captured = capsys.readouterr()
-    assert "# ø\nNo transactions processed\n" == captured.out.encode('utf-8')
+    assert "# ø\nNo transactions processed\n" == b(captured.out)
    assert captured.err == ""
--- a/zodbtools/test/test_commit.py
+++ b/zodbtools/test/test_commit.py
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2020  Nexedi SA and Contributors.
+# Copyright (C) 2018-2022  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #                          Jérome Perrin <jerome@nexedi.com>
 #
@@ -27,7 +27,7 @@ from ZODB._compat import BytesIO, dumps, _protocol   # XXX can't yet commit with

 from tempfile import mkdtemp
 from shutil import rmtree
-from golang import func, defer
+from golang import func, defer, b

 # verify zodbcommit.
 @func
@@ -43,8 +43,8 @@ def test_zodbcommit(zext):
    # commit some transactions via zodbcommit and verify if storage dump gives
    # what is expected.
    t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
-        ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
-        ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])
+        ObjectData(p64(1), b'data1', b('sha1'), sha1(b'data1')),
+        ObjectData(p64(2), b'data2', b('sha1'), sha1(b'data2'))])

    t1.tid = zodbcommit(stor, head, t1)


--- a/zodbtools/test/test_dump.py
+++ b/zodbtools/test/test_dump.py
@@ -30,15 +30,16 @@ from io import BytesIO

 from os.path import dirname

-from zodbtools.test.testutil import zext_supported
+from zodbtools.test.testutil import zext_supported, fs1_testdata_py23
 from pytest import mark, raises, xfail

 # verify zodbdump output against golden
 @mark.parametrize('pretty', ('raw', 'zpickledis'))
-def test_zodbdump(zext, pretty):
+def test_zodbdump(tmpdir, zext, pretty):
    tdir  = dirname(__file__)
    zkind = '_!zext' if zext.disabled else ''
-    stor  = FileStorage('%s/testdata/1%s.fs' % (tdir, zkind), read_only=True)
+    tfs1  = fs1_testdata_py23(tmpdir, '%s/testdata/1%s.fs' % (tdir, zkind))
+    stor  = FileStorage(tfs1, read_only=True)

    with open('%s/testdata/1%s.zdump.%s.ok' % (tdir, zkind, pretty), 'rb') as f:
        dumpok = f.read()

--- a/zodbtools/test/test_restore.py
+++ b/zodbtools/test/test_restore.py
@@ -21,7 +21,8 @@
 from __future__ import print_function

 from zodbtools.zodbrestore import zodbrestore
-from zodbtools.util import storageFromURL
+from zodbtools.util import storageFromURL, readfile
+from zodbtools.test.testutil import fs1_testdata_py23

 from os.path import dirname
 from tempfile import mkdtemp
@@ -30,9 +31,7 @@ from golang import func, defer

 # verify zodbrestore.
 @func
-def test_zodbrestore(zext):
-    tmpd = mkdtemp('', 'zodbrestore.')
-    defer(lambda: rmtree(tmpd))
+def test_zodbrestore(tmpdir, zext):
    zkind = '_!zext' if zext.disabled else ''

    # restore from testdata/1.zdump.ok and verify it gives result that is
@@ -43,18 +42,12 @@ def test_zodbrestore(zext):
        zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
        defer(zdump.close)

-        stor = storageFromURL('%s/2.fs' % tmpd)
+        stor = storageFromURL('%s/2.fs' % tmpdir)
        defer(stor.close)

        zodbrestore(stor, zdump)
    _()

-    zfs1 = _readfile("%s/1%s.fs" % (tdata, zkind))
-    zfs2 = _readfile("%s/2.fs" % tmpd)
+    zfs1 = readfile(fs1_testdata_py23(tmpdir, "%s/1%s.fs" % (tdata, zkind)))
+    zfs2 = readfile("%s/2.fs" % tmpdir)
    assert zfs1 == zfs2
-
-
-# _readfile reads file at path.
-def _readfile(path): # -> data(bytes)
-    with open(path, 'rb') as _:
-        return _.read()
--- a/zodbtools/test/testutil.py
+++ b/zodbtools/test/testutil.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2019  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2019-2022  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -27,6 +27,10 @@ import transaction
 from tempfile import mkdtemp
 from shutil import rmtree
 from golang import func, defer
+from six import PY3
+from os.path import basename
+
+from zodbtools.util import readfile, writefile

 # zext_supported checks whether ZODB supports txn.extension_bytes .
 _zext_supported_memo = None
@@ -61,3 +65,19 @@ def _zext_supported():

    assert last_txn.extension == {'a': 'b'}
    return hasattr(last_txn, 'extension_bytes')
+
+
+# fs1_testdata_py23 prepares and returns path to temprary FileStorage prepared
+# from testdata with header adjusted to work on current Python.
+def fs1_testdata_py23(tmpdir, path):
+    data  = readfile(path)
+    index = readfile(path + ".index")
+    assert data[:4] == b"FS21"      # FileStorage magic for Python2
+    if PY3:
+        data = b"FS30" + data[4:]   # FileStorage magic for Python3
+
+    path_ = "%s/%s" % (tmpdir, basename(path))
+
+    writefile(path_, data)
+    writefile("%s.index" % path_, index)
+    return path_
--- a/zodbtools/util.py
+++ b/zodbtools/util.py
 # -*- coding: utf-8 -*-
 # zodbtools - various utility routines
-# Copyright (C) 2016-2019  Nexedi SA and Contributors.
+# Copyright (C) 2016-2022  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #                          Jérome Perrin <jerome@nexedi.com>
 #
@@ -27,9 +27,12 @@ from zlib import crc32, adler32
 from ZODB.TimeStamp import TimeStamp
 import dateparser

+from golang import b
+
+
 def ashex(s):
-    # type: (bytes) -> bytes
-    return codecs.encode(s, 'hex')
+    # type: (bytes) -> bstr
+    return b(codecs.encode(s, 'hex'))

 def fromhex(s):
    # type: (Union[str,bytes]) -> bytes
@@ -235,3 +238,15 @@ def asbinstream(stream):
    if isinstance(stream, io.TextIOBase):
        return stream.buffer
    return stream
+
+
+# readfile reads file at path.
+def readfile(path): # -> data(bytes)
+    with open(path, 'rb') as _:
+        return _.read()
+
+# writefile writes data to file at path.
+# if the file existed before its old data is erased.
+def writefile(path, data):
+    with open(path, 'wb') as _:
+        _.write(data)
--- a/zodbtools/zodbanalyze.py
+++ b/zodbtools/zodbanalyze.py
@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version
 from ZODB.FileStorage.format import FileStorageFormatter
 from ZODB.utils import get_pickle_metadata
 from zodbtools.util import storageFromURL, parse_tidrange, ashex
-from golang import func, defer
+from golang import func, defer, b

 class DeltaFileStorage(
    FileStorageFormatter,
@@ -225,7 +225,7 @@ def analyze_rec(report, record):
                report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
                report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
            else:
-                type = report.OIDMAP[oid]
+                type = b(report.OIDMAP[oid])
                if report.use_dbm:
                    fsize = int(report.USEDMAP[oid])
                    report.USEDMAP[oid] = str(size)

--- a/zodbtools/zodbcommit.py
+++ b/zodbtools/zodbcommit.py
-# Copyright (C) 2018-2021  Nexedi SA and Contributors.
+# Copyright (C) 2018-2022  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -40,12 +40,12 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`.

 from __future__ import print_function
 from zodbtools import zodbdump
-from zodbtools.util import ashex, fromhex, storageFromURL
+from zodbtools.util import ashex, fromhex, storageFromURL, asbinstream
 from ZODB.interfaces import IStorageRestoreable
 from ZODB.utils import p64, u64, z64
 from ZODB.POSException import POSKeyError
 from ZODB._compat import BytesIO
-from golang import func, defer, panic
+from golang import func, defer, panic, b
 import warnings


@@ -217,9 +217,9 @@ def main(argv):
    defer(stor.close)

    # artificial transaction header with tid=0 to request regular commit
-    zin = b'txn 0000000000000000 " "\n'
+    zin = b('txn 0000000000000000 " "\n')

-    zin += sys.stdin.read()
+    zin += asbinstream(sys.stdin).read()
    zin = BytesIO(zin)
    zr = zodbdump.DumpReader(zin)
    zr.lineno -= 1                      # we prepended txn header

--- a/zodbtools/zodbdump.py
+++ b/zodbtools/zodbdump.py
@@ -76,6 +76,7 @@ import logging as log
 import re
 from golang.gcompat import qq
 from golang import func, defer, strconv, b
+from six import StringIO  # io.StringIO does not accept non-unicode strings on py2

 # txn_raw_extension returns raw extension from txn metadata
 def txn_raw_extension(stor, txn):
@@ -122,9 +123,9 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
            else:
                out.write(b"extension\n")
                extf = BytesIO(rawext)
-                disf = BytesIO()
+                disf = StringIO()
                pickletools.dis(extf, disf)
-                out.write(indent(disf.getvalue(), "  "))
+                out.write(b(indent(disf.getvalue(), "  ")))
                extra = extf.read()
                if len(extra) > 0:
                    out.write(b"  + extra data %s\n" % qq(extra))
@@ -161,10 +162,10 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
                    elif pretty == 'zpickledis':
                        # https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29
                        dataf = BytesIO(obj.data)
-                        disf  = BytesIO()
+                        disf  = StringIO()
                        pickletools.dis(dataf, disf) # class
                        pickletools.dis(dataf, disf) # state
-                        out.write(indent(disf.getvalue(), "  "))
+                        out.write(b(indent(disf.getvalue(), "  ")))
                        extra = dataf.read()
                        if len(extra) > 0:
                            out.write(b"  + extra data %s\n" % qq(extra))
@@ -432,7 +433,7 @@ class DumpReader(object):

            else:
                size     = int(m.group('size'))
-                hashfunc = m.group('hashfunc')
+                hashfunc = b(m.group('hashfunc'))
                hashok   = fromhex(m.group('hash'))
                hashonly = m.group('hashonly') is not None
                data     = None # see vvv
@@ -550,7 +551,7 @@ class ObjectCopy(Object):
 # ObjectData represents record with object data.
 class ObjectData(Object):
    # .data         HashOnly | bytes
-    # .hashfunc     str             hash function used for integrity
+    # .hashfunc     bstr            hash function used for integrity
    # .hash_        bytes           hash of the object's data
    def __init__(self, oid, data, hashfunc, hash_):
        super(ObjectData, self).__init__(oid)