Commit 42799cf6 authored by Kirill Smelkov's avatar Kirill Smelkov

X on py3 porting

Works, but needs pygolang@c9648c44
parent 80559a94
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors.
# Copyright (C) 2019-2022 Nexedi SA and Contributors.
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
......@@ -18,14 +18,20 @@
# See https://www.nexedi.com/licensing for rationale and options.
from zodbtools.zodbanalyze import analyze, report
from zodbtools.test.testutil import fs1_testdata_py23
from zodbtools.util import fromhex
import os.path
from golang import b
def test_zodbanalyze(capsys):
def test_zodbanalyze(tmpdir, capsys):
tfs1 = fs1_testdata_py23(tmpdir,
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))
for use_dbm in (False, True):
report(
analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
tfs1,
use_dbm=use_dbm,
delta_fs=False,
tidmin=None,
......@@ -40,7 +46,7 @@ def test_zodbanalyze(capsys):
# csv output
report(
analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
tfs1,
use_dbm=False,
delta_fs=False,
tidmin=None,
......@@ -61,14 +67,14 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577
# empty range
report(
analyze(
os.path.join(os.path.dirname(__file__), "testdata", "1.fs"),
tfs1,
use_dbm=False,
delta_fs=False,
tidmin="ffffffffffffffff",
tidmin=fromhex("ffffffffffffffff"),
tidmax=None,
),
csv=False,
)
captured = capsys.readouterr()
assert "# ø\nNo transactions processed\n" == captured.out.encode('utf-8')
assert "# ø\nNo transactions processed\n" == b(captured.out)
assert captured.err == ""
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2020 Nexedi SA and Contributors.
# Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com>
#
......@@ -43,8 +43,8 @@ def test_zodbcommit(zext):
# commit some transactions via zodbcommit and verify if storage dump gives
# what is expected.
t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])
ObjectData(p64(1), b'data1', b'sha1', sha1(b'data1')),
ObjectData(p64(2), b'data2', b'sha1', sha1(b'data2'))])
t1.tid = zodbcommit(stor, head, t1)
......
......@@ -30,15 +30,17 @@ from io import BytesIO
from os.path import dirname
from zodbtools.test.testutil import zext_supported
from zodbtools.test.testutil import zext_supported, fs1_testdata_py23
from pytest import mark, raises, xfail
# verify zodbdump output against golden
@mark.parametrize('pretty', ('raw', 'zpickledis'))
def test_zodbdump(zext, pretty):
def test_zodbdump(tmpdir, zext, pretty):
tdir = dirname(__file__)
zkind = '_!zext' if zext.disabled else ''
stor = FileStorage('%s/testdata/1%s.fs' % (tdir, zkind), read_only=True)
tfs1 = fs1_testdata_py23(tmpdir, '%s/testdata/1%s.fs' % (tdir, zkind))
stor = FileStorage(tfs1, read_only=True)
with open('%s/testdata/1%s.zdump.%s.ok' % (tdir, zkind, pretty), 'rb') as f:
dumpok = f.read()
......
......@@ -21,7 +21,8 @@
from __future__ import print_function
from zodbtools.zodbrestore import zodbrestore
from zodbtools.util import storageFromURL
from zodbtools.util import storageFromURL, readfile
from zodbtools.test.testutil import fs1_testdata_py23
from os.path import dirname
from tempfile import mkdtemp
......@@ -30,9 +31,7 @@ from golang import func, defer
# verify zodbrestore.
@func
def test_zodbrestore(zext):
tmpd = mkdtemp('', 'zodbrestore.')
defer(lambda: rmtree(tmpd))
def test_zodbrestore(tmpdir, zext):
zkind = '_!zext' if zext.disabled else ''
# restore from testdata/1.zdump.ok and verify it gives result that is
......@@ -43,18 +42,12 @@ def test_zodbrestore(zext):
zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
defer(zdump.close)
stor = storageFromURL('%s/2.fs' % tmpd)
stor = storageFromURL('%s/2.fs' % tmpdir)
defer(stor.close)
zodbrestore(stor, zdump)
_()
zfs1 = _readfile("%s/1%s.fs" % (tdata, zkind))
zfs2 = _readfile("%s/2.fs" % tmpd)
zfs1 = readfile(fs1_testdata_py23(tmpdir, "%s/1%s.fs" % (tdata, zkind)))
zfs2 = readfile("%s/2.fs" % tmpdir)
assert zfs1 == zfs2
# _readfile reads file at path.
def _readfile(path): # -> data(bytes)
with open(path, 'rb') as _:
return _.read()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Copyright (C) 2019-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
......@@ -27,6 +27,10 @@ import transaction
from tempfile import mkdtemp
from shutil import rmtree
from golang import func, defer
from six import PY3
from os.path import basename
from zodbtools.util import readfile, writefile
# zext_supported checks whether ZODB supports txn.extension_bytes .
_zext_supported_memo = None
......@@ -61,3 +65,19 @@ def _zext_supported():
assert last_txn.extension == {'a': 'b'}
return hasattr(last_txn, 'extension_bytes')
# fs1_testdata_py23 prepares and returns path to temprary FileStorage prepared
# from testdata with header adjusted to work on current Python.
def fs1_testdata_py23(tmpdir, path):
data = readfile(path)
index = readfile(path + ".index")
assert data[:4] == b"FS21" # FileStorage magic for Python2
if PY3:
data = b"FS30" + data[4:] # FileStorage magic for Python3
path_ = "%s/%s" % (tmpdir, basename(path))
writefile(path_, data)
writefile("%s.index" % path_, index)
return path_
# -*- coding: utf-8 -*-
# zodbtools - various utility routines
# Copyright (C) 2016-2019 Nexedi SA and Contributors.
# Copyright (C) 2016-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com>
#
......@@ -27,9 +27,12 @@ from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp
import dateparser
from golang import b
def ashex(s):
# type: (bytes) -> bytes
return codecs.encode(s, 'hex')
# type: (bytes) -> bstr
return b(codecs.encode(s, 'hex'))
def fromhex(s):
# type: (Union[str,bytes]) -> bytes
......@@ -235,3 +238,15 @@ def asbinstream(stream):
if isinstance(stream, io.TextIOBase):
return stream.buffer
return stream
# readfile reads file at path.
def readfile(path): # -> data(bytes)
with open(path, 'rb') as _:
return _.read()
# writefile writes data to file at path.
# if the file existed before its old data is erased.
def writefile(path, data):
with open(path, 'wb') as _:
_.write(data)
......@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version
from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata
from zodbtools.util import storageFromURL, parse_tidrange, ashex
from golang import func, defer
from golang import func, defer, b
class DeltaFileStorage(
FileStorageFormatter,
......@@ -225,7 +225,7 @@ def analyze_rec(report, record):
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else:
type = report.OIDMAP[oid]
type = b(report.OIDMAP[oid])
if report.use_dbm:
fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size)
......
# Copyright (C) 2018-2021 Nexedi SA and Contributors.
# Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -40,12 +40,12 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`.
from __future__ import print_function
from zodbtools import zodbdump
from zodbtools.util import ashex, fromhex, storageFromURL
from zodbtools.util import ashex, fromhex, storageFromURL, asbinstream
from ZODB.interfaces import IStorageRestoreable
from ZODB.utils import p64, u64, z64
from ZODB.POSException import POSKeyError
from ZODB._compat import BytesIO
from golang import func, defer, panic
from golang import func, defer, panic, b
import warnings
......@@ -217,9 +217,9 @@ def main(argv):
defer(stor.close)
# artificial transaction header with tid=0 to request regular commit
zin = b'txn 0000000000000000 " "\n'
zin = b('txn 0000000000000000 " "\n')
zin += sys.stdin.read()
zin += asbinstream(sys.stdin).read()
zin = BytesIO(zin)
zr = zodbdump.DumpReader(zin)
zr.lineno -= 1 # we prepended txn header
......
......@@ -65,7 +65,7 @@ TODO also protect txn record by hash.
from __future__ import print_function
from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRangeInvalid, \
storageFromURL, hashRegistry, asbinstream
from ZODB._compat import loads, _protocol, BytesIO
from ZODB._compat import loads, _protocol
from zodbpickle.slowpickle import Pickler as pyPickler
import pickletools
from ZODB.interfaces import IStorageTransactionInformation
......@@ -74,6 +74,7 @@ from zope.interface import implementer
import sys
import logging as log
import re
from io import BytesIO, StringIO
from golang.gcompat import qq
from golang import func, defer, strconv, b
......@@ -122,9 +123,9 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
else:
out.write(b"extension\n")
extf = BytesIO(rawext)
disf = BytesIO()
disf = StringIO()
pickletools.dis(extf, disf)
out.write(indent(disf.getvalue(), " "))
out.write(b(indent(disf.getvalue(), " ")))
extra = extf.read()
if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra))
......@@ -161,10 +162,10 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
elif pretty == 'zpickledis':
# https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29
dataf = BytesIO(obj.data)
disf = BytesIO()
disf = StringIO()
pickletools.dis(dataf, disf) # class
pickletools.dis(dataf, disf) # state
out.write(indent(disf.getvalue(), " "))
out.write(b(indent(disf.getvalue(), " ")))
extra = dataf.read()
if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra))
......@@ -432,7 +433,7 @@ class DumpReader(object):
else:
size = int(m.group('size'))
hashfunc = m.group('hashfunc')
hashfunc = b(m.group('hashfunc'))
hashok = fromhex(m.group('hash'))
hashonly = m.group('hashonly') is not None
data = None # see vvv
......@@ -550,7 +551,7 @@ class ObjectCopy(Object):
# ObjectData represents record with object data.
class ObjectData(Object):
# .data HashOnly | bytes
# .hashfunc str hash function used for integrity
# .hashfunc bstr hash function used for integrity
  • Without looking into details, I don't understand why isn't .hashfunc a string ?

    I'm commenting here, but what made me wonder is the patch from zodbtools/test/test_commit.py:

    -        ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
    +        ObjectData(p64(1), b'data1', b'sha1', sha1(b'data1')),

    and also in hashRegistry uses str as keys.

  • @jerome, thanks for feedback. I don't recall the details now, but what I recall is this:

    • I first tried to use bytes for hashfunc. Bytes, because we parse input as bytes and also we emit it as bytes. But then it forced to change hashRegistry, because, as you say, its keys are strings and in py3 accessing dict with string keys by bytes does not work. So either it was needed to change hashRegistry to use bytes as keys or use hashfunc as as string.

    • I tried to change hashRegistry to use bytes as keys, but then plain access to it e.g. as hashRegistry['sha1'] stopped to work, because 'sha1' is str, not bytes.

    • but keeping .hashfunc as regular str, if I recall correctly, is not convenient. Unfortunately now I do not recall the details why it was hard to get everything working with just regular bytes and str, but after accepting the switch to bstr, it was simply easier to keep hashfunc as bstr, becase when bytes input is parsed, it comes as bytes, and we need to extract hashfunc field and decode utf-8 decode it, while not rejecting non-utf8 hashfunc strings (not rejecting at the decode stage). So here is how the decoder implements it:

    https://lab.nexedi.com/kirr/zodbtools/blob/42799cf6/zodbtools/zodbdump.py#L434-443

    You see, to convert a raw-bytes input into string it is just hashfunc = b(m.group('hashfunc')).

    It satisfies all needed properties, and the result of decoding, while being bytes internally, can interoperate with strings just ok (e.g. used in hashRegistry as keys), while it can be also easily used as just raw bytes in bytes context, e.g. when encoding ObjectData back into raw bytes:

    https://lab.nexedi.com/kirr/zodbtools/blob/42799cf6/zodbtools/zodbdump.py#L569

    I appologize for not making proper merge-request for this change (hopefully yet), and not recalling the details.

  • Thanks @kirr it makes sense, I also started porting zodbtools to python3 many months ago and I remember experiencing something like you describe, it is more natural that the keys from hashRegistry (ie. the names of the hash functions) are strings, but we read the streams as bytes so when we read a hash function it's a bytes.

    I did not understand how this patch could work, I was thinking that this was thanks to bstr, but the two places I look were using bytes b"sha1" (the ObjectData created in test_commit.py) and "sha1" (the initialisation of hashRegistry) so I was confused, but with that explanation it is clear.

  • @jerome, thanks for feedback, and good to hear that you experienced something similar in your trials, that helps to find a common ground for the context here.

    bstr, if I recall correctly, is indeed essential for this patch. I've tried to keep its use to the miniumim, but, if I recall correctly, without bstr in a couple of key places it won't work good.

Please register or sign in to reply
# .hash_ bytes hash of the object's data
def __init__(self, oid, data, hashfunc, hash_):
super(ObjectData, self).__init__(oid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment