Commit 42799cf6 authored by Kirill Smelkov's avatar Kirill Smelkov

X on py3 porting

Works, but needs pygolang@c9648c44
parent 80559a94
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors. # Copyright (C) 2019-2022 Nexedi SA and Contributors.
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -18,14 +18,20 @@ ...@@ -18,14 +18,20 @@
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
from zodbtools.zodbanalyze import analyze, report from zodbtools.zodbanalyze import analyze, report
from zodbtools.test.testutil import fs1_testdata_py23
from zodbtools.util import fromhex
import os.path import os.path
from golang import b
def test_zodbanalyze(capsys): def test_zodbanalyze(tmpdir, capsys):
tfs1 = fs1_testdata_py23(tmpdir,
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))
for use_dbm in (False, True): for use_dbm in (False, True):
report( report(
analyze( analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"), tfs1,
use_dbm=use_dbm, use_dbm=use_dbm,
delta_fs=False, delta_fs=False,
tidmin=None, tidmin=None,
...@@ -40,7 +46,7 @@ def test_zodbanalyze(capsys): ...@@ -40,7 +46,7 @@ def test_zodbanalyze(capsys):
# csv output # csv output
report( report(
analyze( analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"), tfs1,
use_dbm=False, use_dbm=False,
delta_fs=False, delta_fs=False,
tidmin=None, tidmin=None,
...@@ -61,14 +67,14 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577 ...@@ -61,14 +67,14 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577
# empty range # empty range
report( report(
analyze( analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"), tfs1,
use_dbm=False, use_dbm=False,
delta_fs=False, delta_fs=False,
tidmin="ffffffffffffffff", tidmin=fromhex("ffffffffffffffff"),
tidmax=None, tidmax=None,
), ),
csv=False, csv=False,
) )
captured = capsys.readouterr() captured = capsys.readouterr()
assert "# ø\nNo transactions processed\n" == captured.out.encode('utf-8') assert "# ø\nNo transactions processed\n" == b(captured.out)
assert captured.err == "" assert captured.err == ""
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2020 Nexedi SA and Contributors. # Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -43,8 +43,8 @@ def test_zodbcommit(zext): ...@@ -43,8 +43,8 @@ def test_zodbcommit(zext):
# commit some transactions via zodbcommit and verify if storage dump gives # commit some transactions via zodbcommit and verify if storage dump gives
# what is expected. # what is expected.
t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [ t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')), ObjectData(p64(1), b'data1', b'sha1', sha1(b'data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))]) ObjectData(p64(2), b'data2', b'sha1', sha1(b'data2'))])
t1.tid = zodbcommit(stor, head, t1) t1.tid = zodbcommit(stor, head, t1)
......
...@@ -30,15 +30,17 @@ from io import BytesIO ...@@ -30,15 +30,17 @@ from io import BytesIO
from os.path import dirname from os.path import dirname
from zodbtools.test.testutil import zext_supported from zodbtools.test.testutil import zext_supported, fs1_testdata_py23
from pytest import mark, raises, xfail from pytest import mark, raises, xfail
# verify zodbdump output against golden # verify zodbdump output against golden
@mark.parametrize('pretty', ('raw', 'zpickledis')) @mark.parametrize('pretty', ('raw', 'zpickledis'))
def test_zodbdump(zext, pretty): def test_zodbdump(tmpdir, zext, pretty):
tdir = dirname(__file__) tdir = dirname(__file__)
zkind = '_!zext' if zext.disabled else '' zkind = '_!zext' if zext.disabled else ''
stor = FileStorage('%s/testdata/1%s.fs' % (tdir, zkind), read_only=True) tfs1 = fs1_testdata_py23(tmpdir, '%s/testdata/1%s.fs' % (tdir, zkind))
stor = FileStorage(tfs1, read_only=True)
with open('%s/testdata/1%s.zdump.%s.ok' % (tdir, zkind, pretty), 'rb') as f: with open('%s/testdata/1%s.zdump.%s.ok' % (tdir, zkind, pretty), 'rb') as f:
dumpok = f.read() dumpok = f.read()
......
...@@ -21,7 +21,8 @@ ...@@ -21,7 +21,8 @@
from __future__ import print_function from __future__ import print_function
from zodbtools.zodbrestore import zodbrestore from zodbtools.zodbrestore import zodbrestore
from zodbtools.util import storageFromURL from zodbtools.util import storageFromURL, readfile
from zodbtools.test.testutil import fs1_testdata_py23
from os.path import dirname from os.path import dirname
from tempfile import mkdtemp from tempfile import mkdtemp
...@@ -30,9 +31,7 @@ from golang import func, defer ...@@ -30,9 +31,7 @@ from golang import func, defer
# verify zodbrestore. # verify zodbrestore.
@func @func
def test_zodbrestore(zext): def test_zodbrestore(tmpdir, zext):
tmpd = mkdtemp('', 'zodbrestore.')
defer(lambda: rmtree(tmpd))
zkind = '_!zext' if zext.disabled else '' zkind = '_!zext' if zext.disabled else ''
# restore from testdata/1.zdump.ok and verify it gives result that is # restore from testdata/1.zdump.ok and verify it gives result that is
...@@ -43,18 +42,12 @@ def test_zodbrestore(zext): ...@@ -43,18 +42,12 @@ def test_zodbrestore(zext):
zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb') zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
defer(zdump.close) defer(zdump.close)
stor = storageFromURL('%s/2.fs' % tmpd) stor = storageFromURL('%s/2.fs' % tmpdir)
defer(stor.close) defer(stor.close)
zodbrestore(stor, zdump) zodbrestore(stor, zdump)
_() _()
zfs1 = _readfile("%s/1%s.fs" % (tdata, zkind)) zfs1 = readfile(fs1_testdata_py23(tmpdir, "%s/1%s.fs" % (tdata, zkind)))
zfs2 = _readfile("%s/2.fs" % tmpd) zfs2 = readfile("%s/2.fs" % tmpdir)
assert zfs1 == zfs2 assert zfs1 == zfs2
# _readfile reads file at path.
def _readfile(path): # -> data(bytes)
with open(path, 'rb') as _:
return _.read()
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors. # Copyright (C) 2019-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -27,6 +27,10 @@ import transaction ...@@ -27,6 +27,10 @@ import transaction
from tempfile import mkdtemp from tempfile import mkdtemp
from shutil import rmtree from shutil import rmtree
from golang import func, defer from golang import func, defer
from six import PY3
from os.path import basename
from zodbtools.util import readfile, writefile
# zext_supported checks whether ZODB supports txn.extension_bytes . # zext_supported checks whether ZODB supports txn.extension_bytes .
_zext_supported_memo = None _zext_supported_memo = None
...@@ -61,3 +65,19 @@ def _zext_supported(): ...@@ -61,3 +65,19 @@ def _zext_supported():
assert last_txn.extension == {'a': 'b'} assert last_txn.extension == {'a': 'b'}
return hasattr(last_txn, 'extension_bytes') return hasattr(last_txn, 'extension_bytes')
# fs1_testdata_py23 prepares and returns path to temprary FileStorage prepared
# from testdata with header adjusted to work on current Python.
def fs1_testdata_py23(tmpdir, path):
data = readfile(path)
index = readfile(path + ".index")
assert data[:4] == b"FS21" # FileStorage magic for Python2
if PY3:
data = b"FS30" + data[4:] # FileStorage magic for Python3
path_ = "%s/%s" % (tmpdir, basename(path))
writefile(path_, data)
writefile("%s.index" % path_, index)
return path_
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# zodbtools - various utility routines # zodbtools - various utility routines
# Copyright (C) 2016-2019 Nexedi SA and Contributors. # Copyright (C) 2016-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -27,9 +27,12 @@ from zlib import crc32, adler32 ...@@ -27,9 +27,12 @@ from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp from ZODB.TimeStamp import TimeStamp
import dateparser import dateparser
from golang import b
def ashex(s): def ashex(s):
# type: (bytes) -> bytes # type: (bytes) -> bstr
return codecs.encode(s, 'hex') return b(codecs.encode(s, 'hex'))
def fromhex(s): def fromhex(s):
# type: (Union[str,bytes]) -> bytes # type: (Union[str,bytes]) -> bytes
...@@ -235,3 +238,15 @@ def asbinstream(stream): ...@@ -235,3 +238,15 @@ def asbinstream(stream):
if isinstance(stream, io.TextIOBase): if isinstance(stream, io.TextIOBase):
return stream.buffer return stream.buffer
return stream return stream
# readfile reads file at path.
def readfile(path): # -> data(bytes)
with open(path, 'rb') as _:
return _.read()
# writefile writes data to file at path.
# if the file existed before its old data is erased.
def writefile(path, data):
with open(path, 'wb') as _:
_.write(data)
...@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version ...@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version
from ZODB.FileStorage.format import FileStorageFormatter from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata from ZODB.utils import get_pickle_metadata
from zodbtools.util import storageFromURL, parse_tidrange, ashex from zodbtools.util import storageFromURL, parse_tidrange, ashex
from golang import func, defer from golang import func, defer, b
class DeltaFileStorage( class DeltaFileStorage(
FileStorageFormatter, FileStorageFormatter,
...@@ -225,7 +225,7 @@ def analyze_rec(report, record): ...@@ -225,7 +225,7 @@ def analyze_rec(report, record):
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1 report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else: else:
type = report.OIDMAP[oid] type = b(report.OIDMAP[oid])
if report.use_dbm: if report.use_dbm:
fsize = int(report.USEDMAP[oid]) fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size) report.USEDMAP[oid] = str(size)
......
# Copyright (C) 2018-2021 Nexedi SA and Contributors. # Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -40,12 +40,12 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`. ...@@ -40,12 +40,12 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`.
from __future__ import print_function from __future__ import print_function
from zodbtools import zodbdump from zodbtools import zodbdump
from zodbtools.util import ashex, fromhex, storageFromURL from zodbtools.util import ashex, fromhex, storageFromURL, asbinstream
from ZODB.interfaces import IStorageRestoreable from ZODB.interfaces import IStorageRestoreable
from ZODB.utils import p64, u64, z64 from ZODB.utils import p64, u64, z64
from ZODB.POSException import POSKeyError from ZODB.POSException import POSKeyError
from ZODB._compat import BytesIO from ZODB._compat import BytesIO
from golang import func, defer, panic from golang import func, defer, panic, b
import warnings import warnings
...@@ -217,9 +217,9 @@ def main(argv): ...@@ -217,9 +217,9 @@ def main(argv):
defer(stor.close) defer(stor.close)
# artificial transaction header with tid=0 to request regular commit # artificial transaction header with tid=0 to request regular commit
zin = b'txn 0000000000000000 " "\n' zin = b('txn 0000000000000000 " "\n')
zin += sys.stdin.read() zin += asbinstream(sys.stdin).read()
zin = BytesIO(zin) zin = BytesIO(zin)
zr = zodbdump.DumpReader(zin) zr = zodbdump.DumpReader(zin)
zr.lineno -= 1 # we prepended txn header zr.lineno -= 1 # we prepended txn header
......
...@@ -65,7 +65,7 @@ TODO also protect txn record by hash. ...@@ -65,7 +65,7 @@ TODO also protect txn record by hash.
from __future__ import print_function from __future__ import print_function
from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRangeInvalid, \ from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRangeInvalid, \
storageFromURL, hashRegistry, asbinstream storageFromURL, hashRegistry, asbinstream
from ZODB._compat import loads, _protocol, BytesIO from ZODB._compat import loads, _protocol
from zodbpickle.slowpickle import Pickler as pyPickler from zodbpickle.slowpickle import Pickler as pyPickler
import pickletools import pickletools
from ZODB.interfaces import IStorageTransactionInformation from ZODB.interfaces import IStorageTransactionInformation
...@@ -74,6 +74,7 @@ from zope.interface import implementer ...@@ -74,6 +74,7 @@ from zope.interface import implementer
import sys import sys
import logging as log import logging as log
import re import re
from io import BytesIO, StringIO
from golang.gcompat import qq from golang.gcompat import qq
from golang import func, defer, strconv, b from golang import func, defer, strconv, b
...@@ -122,9 +123,9 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream ...@@ -122,9 +123,9 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
else: else:
out.write(b"extension\n") out.write(b"extension\n")
extf = BytesIO(rawext) extf = BytesIO(rawext)
disf = BytesIO() disf = StringIO()
pickletools.dis(extf, disf) pickletools.dis(extf, disf)
out.write(indent(disf.getvalue(), " ")) out.write(b(indent(disf.getvalue(), " ")))
extra = extf.read() extra = extf.read()
if len(extra) > 0: if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra)) out.write(b" + extra data %s\n" % qq(extra))
...@@ -161,10 +162,10 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream ...@@ -161,10 +162,10 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
elif pretty == 'zpickledis': elif pretty == 'zpickledis':
# https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29 # https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29
dataf = BytesIO(obj.data) dataf = BytesIO(obj.data)
disf = BytesIO() disf = StringIO()
pickletools.dis(dataf, disf) # class pickletools.dis(dataf, disf) # class
pickletools.dis(dataf, disf) # state pickletools.dis(dataf, disf) # state
out.write(indent(disf.getvalue(), " ")) out.write(b(indent(disf.getvalue(), " ")))
extra = dataf.read() extra = dataf.read()
if len(extra) > 0: if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra)) out.write(b" + extra data %s\n" % qq(extra))
...@@ -432,7 +433,7 @@ class DumpReader(object): ...@@ -432,7 +433,7 @@ class DumpReader(object):
else: else:
size = int(m.group('size')) size = int(m.group('size'))
hashfunc = m.group('hashfunc') hashfunc = b(m.group('hashfunc'))
hashok = fromhex(m.group('hash')) hashok = fromhex(m.group('hash'))
hashonly = m.group('hashonly') is not None hashonly = m.group('hashonly') is not None
data = None # see vvv data = None # see vvv
...@@ -550,7 +551,7 @@ class ObjectCopy(Object): ...@@ -550,7 +551,7 @@ class ObjectCopy(Object):
# ObjectData represents record with object data. # ObjectData represents record with object data.
class ObjectData(Object): class ObjectData(Object):
# .data HashOnly | bytes # .data HashOnly | bytes
# .hashfunc str hash function used for integrity # .hashfunc bstr hash function used for integrity
  • Without looking into details, I don't understand why isn't .hashfunc a string ?

    I'm commenting here, but what made me wonder is the patch from zodbtools/test/test_commit.py:

    -        ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
    +        ObjectData(p64(1), b'data1', b'sha1', sha1(b'data1')),

    and also in hashRegistry uses str as keys.

  • @jerome, thanks for feedback. I don't recall the details now, but what I recall is this:

    • I first tried to use bytes for hashfunc. Bytes, because we parse input as bytes and also we emit it as bytes. But then it forced to change hashRegistry, because, as you say, its keys are strings and in py3 accessing dict with string keys by bytes does not work. So either it was needed to change hashRegistry to use bytes as keys or use hashfunc as as string.

    • I tried to change hashRegistry to use bytes as keys, but then plain access to it e.g. as hashRegistry['sha1'] stopped to work, because 'sha1' is str, not bytes.

    • but keeping .hashfunc as regular str, if I recall correctly, is not convenient. Unfortunately now I do not recall the details why it was hard to get everything working with just regular bytes and str, but after accepting the switch to bstr, it was simply easier to keep hashfunc as bstr, becase when bytes input is parsed, it comes as bytes, and we need to extract hashfunc field and decode utf-8 decode it, while not rejecting non-utf8 hashfunc strings (not rejecting at the decode stage). So here is how the decoder implements it:

    https://lab.nexedi.com/kirr/zodbtools/blob/42799cf6/zodbtools/zodbdump.py#L434-443

    You see, to convert a raw-bytes input into string it is just hashfunc = b(m.group('hashfunc')).

    It satisfies all needed properties, and the result of decoding, while being bytes internally, can interoperate with strings just ok (e.g. used in hashRegistry as keys), while it can be also easily used as just raw bytes in bytes context, e.g. when encoding ObjectData back into raw bytes:

    https://lab.nexedi.com/kirr/zodbtools/blob/42799cf6/zodbtools/zodbdump.py#L569

    I appologize for not making proper merge-request for this change (hopefully yet), and not recalling the details.

  • Thanks @kirr it makes sense, I also started porting zodbtools to python3 many months ago and I remember experiencing something like you describe, it is more natural that the keys from hashRegistry (ie. the names of the hash functions) are strings, but we read the streams as bytes so when we read a hash function it's a bytes.

    I did not understand how this patch could work, I was thinking that this was thanks to bstr, but the two places I look were using bytes b"sha1" (the ObjectData created in test_commit.py) and "sha1" (the initialisation of hashRegistry) so I was confused, but with that explanation it is clear.

  • @jerome, thanks for feedback, and good to hear that you experienced something similar in your trials, that helps to find a common ground for the context here.

    bstr, if I recall correctly, is indeed essential for this patch. I've tried to keep its use to the miniumim, but, if I recall correctly, without bstr in a couple of key places it won't work good.

Please register or sign in to reply
# .hash_ bytes hash of the object's data # .hash_ bytes hash of the object's data
def __init__(self, oid, data, hashfunc, hash_): def __init__(self, oid, data, hashfunc, hash_):
super(ObjectData, self).__init__(oid) super(ObjectData, self).__init__(oid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment