Commit 9861c136 authored by Kirill Smelkov's avatar Kirill Smelkov

*: Fix working on py3 by using bstr bytestring instead of raw bytes

e.g. for ObjectData .hashfunc:

In many contexts we need that .hashfunc to be like string, e.g. for
accessing hashRegistry by keys. In many other contexts - e.g. when
zodbdump input it parsed or emitted, it is more handy to handle it like
raw bytes.

If we let .hashfunc to be of type str - it breaks the second mode. If of
type bytes - it breaks the first mode.

And also in many places it is hard to constantly encode/decode str and
bytes, especially in the places where an object is sometimes used in
strings context, and sometimes in binary context.

-> Fix it all in one go by using bytestring type from pygolang,
which provides both unicode string and binary semantics simultaneously.

This needs bstr from pygolang (see kirr/pygolang@c9648c44),
but even if pygolang comes without bstr, with this patch zodbtools
continues to work ok on py2 - it will be just py3 mode that won't work.

The list of test failures before this patch is provided below:

    _______________________________ test_zodbanalyze _______________________________

    tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbanalyze0')
    capsys = <_pytest.capture.CaptureFixture object at 0x7f3de6835c70>

        def test_zodbanalyze(tmpdir, capsys):
            tfs1 = fs1_testdata_py23(tmpdir,
                            os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))

            for use_dbm in (False, True):
    >           report(
                    analyze(
                        tfs1,
                        use_dbm=use_dbm,
                        delta_fs=False,
                        tidmin=None,
                        tidmax=None,
                    ),
                    csv=False,
                )

    zodbtools/test/test_analyze.py:30:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    rep = <zodbtools.zodbanalyze.Report object at 0x7f3de5e16b20>, csv = False

        def report(rep, csv=False):
            ...
                    print (fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
                                   pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
    >                              rep.COIDSMAP[t], rep.CBYTESMAP[t],
                                   rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0)))
    E               KeyError: b'persistent.mapping.PersistentMapping'

    zodbtools/zodbanalyze.py:147: KeyError

    ____________________________ test_zodbcommit[!zext] ____________________________

    zext = <function zext.<locals>._ at 0x7f3deb5c3e50>

        @func
        def test_zodbcommit(zext):
            tmpd = mkdtemp('', 'zodbcommit.')
            defer(lambda: rmtree(tmpd))

            stor = storageFromURL('%s/2.fs' % tmpd)
            defer(stor.close)

            head = stor.lastTransaction()

            # commit some transactions via zodbcommit and verify if storage dump gives
            # what is expected.
            t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
                ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
                ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])

            t1.tid = zodbcommit(stor, head, t1)

            t2 = Transaction(z64, ' ', b'user2', b'desc2', b'', [
                ObjectDelete(p64(2))])

            t2.tid = zodbcommit(stor, t1.tid, t2)

            buf = BytesIO()
            zodbdump(stor, p64(u64(head)+1), None, out=buf)
            dumped = buf.getvalue()

    >       assert dumped == b''.join([_.zdump() for _ in (t1, t2)])

    zodbtools/test/test_commit.py:61:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    zodbtools/test/test_commit.py:61: in <listcomp>
        assert dumped == b''.join([_.zdump() for _ in (t1, t2)])
    zodbtools/zodbdump.py:521: in zdump
        z += obj.zdump()
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.ObjectData object at 0x7f3de5d26d90>

        def zdump(self):
            data = self.data
            hashonly = isinstance(data, HashOnly)
            if hashonly:
                size = data.size
            else:
                size = len(data)
    >       z = b'obj %s %d %s:%s' % (ashex(self.oid), size, self.hashfunc, ashex(self.hash_))
    E       TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'

    zodbtools/zodbdump.py:569: TypeError

    _______________________________ test_dumpreader ________________________________

        def test_dumpreader():
            in_ = b"""\
        txn 0123456789abcdef " "
        user "my name"
        description "o la-la..."
        extension "zzz123 def"
        obj 0000000000000001 delete
        obj 0000000000000002 from 0123456789abcdee
        obj 0000000000000003 54 adler32:01234567 -
        obj 0000000000000004 4 sha1:9865d483bc5a94f2e30056fc256ed3066af54d04
        ZZZZ
        obj 0000000000000005 9 crc32:52fdeac5
        ABC

        DEF!

        txn 0123456789abcdf0 " "
        user "author2"
        description "zzz"
        extension "qqq"

        """

            r = DumpReader(BytesIO(in_))
    >       t1 = r.readtxn()

    zodbtools/test/test_dump.py:78:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    zodbtools/zodbdump.py:443: in readtxn
        self._badline('unknown hash function %s' % qq(hashfunc))
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d69cd0>
    msg = 'unknown hash function "adler32"'

        def _badline(self, msg):
    >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
    E       RuntimeError: +7: invalid line: unknown hash function "adler32" ("obj 0000000000000003 54 adler32:01234567 -")

    zodbtools/zodbdump.py:382: RuntimeError

    ___________________________ test_zodbrestore[!zext] ____________________________

    tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbrestore__zext_0')
    zext = <function zext.<locals>._ at 0x7f3de5d6ddc0>

        @func
        def test_zodbrestore(tmpdir, zext):
            zkind = '_!zext' if zext.disabled else ''

            # restore from testdata/1.zdump.ok and verify it gives result that is
            # bit-to-bit identical to testdata/1.fs
            tdata = dirname(__file__) + "/testdata"
            @func
            def _():
                zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
                defer(zdump.close)

                stor = storageFromURL('%s/2.fs' % tmpdir)
                defer(stor.close)

                zodbrestore(stor, zdump)
    >       _()

    zodbtools/test/test_restore.py:49:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    ../../venv/py3.venv/lib/python3.9/site-packages/decorator.py:232: in fun
        return caller(func, *(extras + args), **kw)
    ../../../tools/go/pygolang/golang/__init__.py:103: in _
        return f(*argv, **kw)
    zodbtools/test/test_restore.py:48: in _
        zodbrestore(stor, zdump)
    zodbtools/zodbrestore.py:39: in zodbrestore
        txn = zr.readtxn()
    zodbtools/zodbdump.py:443: in readtxn
        self._badline('unknown hash function %s' % qq(hashfunc))
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d79e20>
    msg = 'unknown hash function "sha1"'

        def _badline(self, msg):
    >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
    E       RuntimeError: /home/kirr/src/wendelin/z/zodbtools/zodbtools/test/testdata/1_!zext.zdump.raw.ok+5: invalid line: unknown hash function "sha1" ("obj 0000000000000000 61 sha1:664e6de0f153d8eaeda638d616a320c6e3c5feb1")

    zodbtools/zodbdump.py:382: RuntimeError
parent b21fbe23
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
from zodbtools.zodbanalyze import analyze, report from zodbtools.zodbanalyze import analyze, report
from zodbtools.test.testutil import fs1_testdata_py23 from zodbtools.test.testutil import fs1_testdata_py23
import os.path import os.path
from golang import b
def test_zodbanalyze(tmpdir, capsys): def test_zodbanalyze(tmpdir, capsys):
...@@ -74,5 +75,5 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577 ...@@ -74,5 +75,5 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577
csv=False, csv=False,
) )
captured = capsys.readouterr() captured = capsys.readouterr()
assert "# ø\nNo transactions processed\n" == captured.out.encode('utf-8') assert "# ø\nNo transactions processed\n" == b(captured.out)
assert captured.err == "" assert captured.err == ""
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2020 Nexedi SA and Contributors. # Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -27,7 +27,7 @@ from ZODB._compat import BytesIO, dumps, _protocol # XXX can't yet commit with ...@@ -27,7 +27,7 @@ from ZODB._compat import BytesIO, dumps, _protocol # XXX can't yet commit with
from tempfile import mkdtemp from tempfile import mkdtemp
from shutil import rmtree from shutil import rmtree
from golang import func, defer from golang import func, defer, b
# verify zodbcommit. # verify zodbcommit.
@func @func
...@@ -43,8 +43,8 @@ def test_zodbcommit(zext): ...@@ -43,8 +43,8 @@ def test_zodbcommit(zext):
# commit some transactions via zodbcommit and verify if storage dump gives # commit some transactions via zodbcommit and verify if storage dump gives
# what is expected. # what is expected.
t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [ t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')), ObjectData(p64(1), b'data1', b('sha1'), sha1(b'data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))]) ObjectData(p64(2), b'data2', b('sha1'), sha1(b'data2'))])
t1.tid = zodbcommit(stor, head, t1) t1.tid = zodbcommit(stor, head, t1)
......
...@@ -27,9 +27,12 @@ from zlib import crc32, adler32 ...@@ -27,9 +27,12 @@ from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp from ZODB.TimeStamp import TimeStamp
import dateparser import dateparser
from golang import b
def ashex(s): def ashex(s):
# type: (bytes) -> bytes # type: (bytes) -> bstr
return codecs.encode(s, 'hex') return b(codecs.encode(s, 'hex'))
def fromhex(s): def fromhex(s):
# type: (Union[str,bytes]) -> bytes # type: (Union[str,bytes]) -> bytes
......
...@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version ...@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version
from ZODB.FileStorage.format import FileStorageFormatter from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata from ZODB.utils import get_pickle_metadata
from zodbtools.util import storageFromURL, parse_tidrange, ashex from zodbtools.util import storageFromURL, parse_tidrange, ashex
from golang import func, defer from golang import func, defer, b
class DeltaFileStorage( class DeltaFileStorage(
FileStorageFormatter, FileStorageFormatter,
...@@ -225,7 +225,7 @@ def analyze_rec(report, record): ...@@ -225,7 +225,7 @@ def analyze_rec(report, record):
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1 report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else: else:
type = report.OIDMAP[oid] type = b(report.OIDMAP[oid])
if report.use_dbm: if report.use_dbm:
fsize = int(report.USEDMAP[oid]) fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size) report.USEDMAP[oid] = str(size)
......
...@@ -45,7 +45,7 @@ from ZODB.interfaces import IStorageRestoreable ...@@ -45,7 +45,7 @@ from ZODB.interfaces import IStorageRestoreable
from ZODB.utils import p64, u64, z64 from ZODB.utils import p64, u64, z64
from ZODB.POSException import POSKeyError from ZODB.POSException import POSKeyError
from ZODB._compat import BytesIO from ZODB._compat import BytesIO
from golang import func, defer, panic from golang import func, defer, panic, b
import warnings import warnings
...@@ -217,7 +217,7 @@ def main(argv): ...@@ -217,7 +217,7 @@ def main(argv):
defer(stor.close) defer(stor.close)
# artificial transaction header with tid=0 to request regular commit # artificial transaction header with tid=0 to request regular commit
zin = b'txn 0000000000000000 " "\n' zin = b('txn 0000000000000000 " "\n')
zin += asbinstream(sys.stdin).read() zin += asbinstream(sys.stdin).read()
zin = BytesIO(zin) zin = BytesIO(zin)
......
...@@ -433,7 +433,7 @@ class DumpReader(object): ...@@ -433,7 +433,7 @@ class DumpReader(object):
else: else:
size = int(m.group('size')) size = int(m.group('size'))
hashfunc = m.group('hashfunc') hashfunc = b(m.group('hashfunc'))
hashok = fromhex(m.group('hash')) hashok = fromhex(m.group('hash'))
hashonly = m.group('hashonly') is not None hashonly = m.group('hashonly') is not None
data = None # see vvv data = None # see vvv
...@@ -551,7 +551,7 @@ class ObjectCopy(Object): ...@@ -551,7 +551,7 @@ class ObjectCopy(Object):
# ObjectData represents record with object data. # ObjectData represents record with object data.
class ObjectData(Object): class ObjectData(Object):
# .data HashOnly | bytes # .data HashOnly | bytes
# .hashfunc str hash function used for integrity # .hashfunc bstr hash function used for integrity
# .hash_ bytes hash of the object's data # .hash_ bytes hash of the object's data
def __init__(self, oid, data, hashfunc, hash_): def __init__(self, oid, data, hashfunc, hash_):
super(ObjectData, self).__init__(oid) super(ObjectData, self).__init__(oid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment