• Kirill Smelkov's avatar
    *: Fix working on py3 by using bstr bytestring instead of raw bytes · 9861c136
    Kirill Smelkov authored
    e.g. for ObjectData .hashfunc:
    
    In many contexts we need that .hashfunc to be like string, e.g. for
    accessing hashRegistry by keys. In many other contexts - e.g. when
    zodbdump input it parsed or emitted, it is more handy to handle it like
    raw bytes.
    
    If we let .hashfunc to be of type str - it breaks the second mode. If of
    type bytes - it breaks the first mode.
    
    And also in many places it is hard to constantly encode/decode str and
    bytes, especially in the places where an object is sometimes used in
    strings context, and sometimes in binary context.
    
    -> Fix it all in one go by using bytestring type from pygolang,
    which provides both unicode string and binary semantics simultaneously.
    
    This needs bstr from pygolang (see kirr/pygolang@c9648c44),
    but even if pygolang comes without bstr, with this patch zodbtools
    continues to work ok on py2 - it will be just py3 mode that won't work.
    
    The list of test failures before this patch is provided below:
    
        _______________________________ test_zodbanalyze _______________________________
    
        tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbanalyze0')
        capsys = <_pytest.capture.CaptureFixture object at 0x7f3de6835c70>
    
            def test_zodbanalyze(tmpdir, capsys):
                tfs1 = fs1_testdata_py23(tmpdir,
                                os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))
    
                for use_dbm in (False, True):
        >           report(
                        analyze(
                            tfs1,
                            use_dbm=use_dbm,
                            delta_fs=False,
                            tidmin=None,
                            tidmax=None,
                        ),
                        csv=False,
                    )
    
        zodbtools/test/test_analyze.py:30:
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    
        rep = <zodbtools.zodbanalyze.Report object at 0x7f3de5e16b20>, csv = False
    
            def report(rep, csv=False):
                ...
                        print (fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
                                       pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
        >                              rep.COIDSMAP[t], rep.CBYTESMAP[t],
                                       rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0)))
        E               KeyError: b'persistent.mapping.PersistentMapping'
    
        zodbtools/zodbanalyze.py:147: KeyError
    
        ____________________________ test_zodbcommit[!zext] ____________________________
    
        zext = <function zext.<locals>._ at 0x7f3deb5c3e50>
    
            @func
            def test_zodbcommit(zext):
                tmpd = mkdtemp('', 'zodbcommit.')
                defer(lambda: rmtree(tmpd))
    
                stor = storageFromURL('%s/2.fs' % tmpd)
                defer(stor.close)
    
                head = stor.lastTransaction()
    
                # commit some transactions via zodbcommit and verify if storage dump gives
                # what is expected.
                t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
                    ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
                    ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])
    
                t1.tid = zodbcommit(stor, head, t1)
    
                t2 = Transaction(z64, ' ', b'user2', b'desc2', b'', [
                    ObjectDelete(p64(2))])
    
                t2.tid = zodbcommit(stor, t1.tid, t2)
    
                buf = BytesIO()
                zodbdump(stor, p64(u64(head)+1), None, out=buf)
                dumped = buf.getvalue()
    
        >       assert dumped == b''.join([_.zdump() for _ in (t1, t2)])
    
        zodbtools/test/test_commit.py:61:
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
        zodbtools/test/test_commit.py:61: in <listcomp>
            assert dumped == b''.join([_.zdump() for _ in (t1, t2)])
        zodbtools/zodbdump.py:521: in zdump
            z += obj.zdump()
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    
        self = <zodbtools.zodbdump.ObjectData object at 0x7f3de5d26d90>
    
            def zdump(self):
                data = self.data
                hashonly = isinstance(data, HashOnly)
                if hashonly:
                    size = data.size
                else:
                    size = len(data)
        >       z = b'obj %s %d %s:%s' % (ashex(self.oid), size, self.hashfunc, ashex(self.hash_))
        E       TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'
    
        zodbtools/zodbdump.py:569: TypeError
    
        _______________________________ test_dumpreader ________________________________
    
            def test_dumpreader():
                in_ = b"""\
            txn 0123456789abcdef " "
            user "my name"
            description "o la-la..."
            extension "zzz123 def"
            obj 0000000000000001 delete
            obj 0000000000000002 from 0123456789abcdee
            obj 0000000000000003 54 adler32:01234567 -
            obj 0000000000000004 4 sha1:9865d483bc5a94f2e30056fc256ed3066af54d04
            ZZZZ
            obj 0000000000000005 9 crc32:52fdeac5
            ABC
    
            DEF!
    
            txn 0123456789abcdf0 " "
            user "author2"
            description "zzz"
            extension "qqq"
    
            """
    
                r = DumpReader(BytesIO(in_))
        >       t1 = r.readtxn()
    
        zodbtools/test/test_dump.py:78:
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
        zodbtools/zodbdump.py:443: in readtxn
            self._badline('unknown hash function %s' % qq(hashfunc))
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    
        self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d69cd0>
        msg = 'unknown hash function "adler32"'
    
            def _badline(self, msg):
        >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
        E       RuntimeError: +7: invalid line: unknown hash function "adler32" ("obj 0000000000000003 54 adler32:01234567 -")
    
        zodbtools/zodbdump.py:382: RuntimeError
    
        ___________________________ test_zodbrestore[!zext] ____________________________
    
        tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbrestore__zext_0')
        zext = <function zext.<locals>._ at 0x7f3de5d6ddc0>
    
            @func
            def test_zodbrestore(tmpdir, zext):
                zkind = '_!zext' if zext.disabled else ''
    
                # restore from testdata/1.zdump.ok and verify it gives result that is
                # bit-to-bit identical to testdata/1.fs
                tdata = dirname(__file__) + "/testdata"
                @func
                def _():
                    zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
                    defer(zdump.close)
    
                    stor = storageFromURL('%s/2.fs' % tmpdir)
                    defer(stor.close)
    
                    zodbrestore(stor, zdump)
        >       _()
    
        zodbtools/test/test_restore.py:49:
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
        ../../venv/py3.venv/lib/python3.9/site-packages/decorator.py:232: in fun
            return caller(func, *(extras + args), **kw)
        ../../../tools/go/pygolang/golang/__init__.py:103: in _
            return f(*argv, **kw)
        zodbtools/test/test_restore.py:48: in _
            zodbrestore(stor, zdump)
        zodbtools/zodbrestore.py:39: in zodbrestore
            txn = zr.readtxn()
        zodbtools/zodbdump.py:443: in readtxn
            self._badline('unknown hash function %s' % qq(hashfunc))
        _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    
        self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d79e20>
        msg = 'unknown hash function "sha1"'
    
            def _badline(self, msg):
        >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
        E       RuntimeError: /home/kirr/src/wendelin/z/zodbtools/zodbtools/test/testdata/1_!zext.zdump.raw.ok+5: invalid line: unknown hash function "sha1" ("obj 0000000000000000 61 sha1:664e6de0f153d8eaeda638d616a320c6e3c5feb1")
    
        zodbtools/zodbdump.py:382: RuntimeError
    9861c136
zodbdump.py 20.4 KB