Commit 347ea21d authored by Jérome Perrin's avatar Jérome Perrin

util: add type annotations and fix bytes vs str

parent 72011755
[mypy]
# XXX according to mypy doc, this is a bad idea.
ignore_missing_imports = True
......@@ -19,10 +19,15 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
try:
from typing import Tuple
except ImportError:
pass
from collections import OrderedDict
# topic_name -> (topic_summary, topic_help)
topic_dict = OrderedDict()
topic_dict = OrderedDict() # type: OrderedDict[str, Tuple[str, str]]
help_zurl = """\
Almost every zodb command works with a database.
......
......@@ -41,8 +41,8 @@ def test_zodbcommit(zext):
# commit some transactions via zodbcommit and verify if storage dump gives
# what is expected.
t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
ObjectData(p64(1), b'data1', 'sha1', sha1('data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1('data2'))])
ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])
t1.tid = zodbcommit(stor, head, t1)
......@@ -56,7 +56,7 @@ def test_zodbcommit(zext):
zodbdump(stor, p64(u64(head)+1), None, out=buf)
dumped = buf.getvalue()
assert dumped == ''.join([_.zdump() for _ in (t1, t2)])
assert dumped == b''.join([_.zdump() for _ in (t1, t2)])
# ObjectCopy. XXX zodbcommit handled ObjectCopy by actually copying data,
# not referencing previous transaction via backpointer.
......
......@@ -21,14 +21,14 @@ from zodbtools.zodbdump import (
zodbdump, DumpReader, Transaction, ObjectDelete, ObjectCopy,
ObjectData, HashOnly
)
from zodbtools.util import fromhex
from ZODB.FileStorage import FileStorage
from ZODB.utils import p64
from cStringIO import StringIO
from io import BytesIO
from os.path import dirname
from zodbtools.test.testutil import zext_supported
from pytest import raises, xfail
from pytest import raises
# verify zodbdump output against golden
def test_zodbdump(zext):
......@@ -39,7 +39,7 @@ def test_zodbdump(zext):
with open('%s/testdata/1%s.zdump.ok' % (tdir, zkind)) as f:
dumpok = f.read()
out = StringIO()
out = BytesIO()
zodbdump(stor, None, None, out=out)
assert out.getvalue() == dumpok
......@@ -69,10 +69,10 @@ extension "qqq"
"""
r = DumpReader(StringIO(in_))
r = DumpReader(BytesIO(in_))
t1 = r.readtxn()
assert isinstance(t1, Transaction)
assert t1.tid == '0123456789abcdef'.decode('hex')
assert t1.tid == fromhex('0123456789abcdef')
assert t1.user == b'my name'
assert t1.description == b'o la-la...'
assert t1.extension_bytes == b'zzz123 def'
......@@ -83,29 +83,29 @@ extension "qqq"
_ = t1.objv[1]
assert isinstance(_, ObjectCopy)
assert _.oid == p64(2)
assert _.copy_from == '0123456789abcdee'.decode('hex')
assert _.copy_from == fromhex('0123456789abcdee')
_ = t1.objv[2]
assert isinstance(_, ObjectData)
assert _.oid == p64(3)
assert _.data == HashOnly(54)
assert _.hashfunc == 'adler32'
assert _.hash_ == '01234567'.decode('hex')
assert _.hash_ == fromhex('01234567')
_ = t1.objv[3]
assert isinstance(_, ObjectData)
assert _.oid == p64(4)
assert _.data == b'ZZZZ'
assert _.hashfunc == 'sha1'
assert _.hash_ == '9865d483bc5a94f2e30056fc256ed3066af54d04'.decode('hex')
assert _.hash_ == fromhex('9865d483bc5a94f2e30056fc256ed3066af54d04')
_ = t1.objv[4]
assert isinstance(_, ObjectData)
assert _.oid == p64(5)
assert _.data == b'ABC\n\nDEF!'
assert _.hashfunc == 'crc32'
assert _.hash_ == '52fdeac5'.decode('hex')
assert _.hash_ == fromhex('52fdeac5')
t2 = r.readtxn()
assert isinstance(t2, Transaction)
assert t2.tid == '0123456789abcdf0'.decode('hex')
assert t2.tid == fromhex('0123456789abcdf0')
assert t2.user == b'author2'
assert t2.description == b'zzz'
assert t2.extension_bytes == b'qqq'
......@@ -113,11 +113,11 @@ extension "qqq"
assert r.readtxn() == None
z = ''.join([_.zdump() for _ in (t1, t2)])
z = b''.join([_.zdump() for _ in (t1, t2)])
assert z == in_
# unknown hash function
r = DumpReader(StringIO("""\
r = DumpReader(BytesIO(b"""\
txn 0000000000000000 " "
user ""
description ""
......@@ -130,7 +130,7 @@ obj 0000000000000001 1 xyz:0123 -
assert exc.value.args == ("""+5: invalid line: unknown hash function "xyz" ('obj 0000000000000001 1 xyz:0123 -')""",)
# data integrity error
r = DumpReader(StringIO("""\
r = DumpReader(BytesIO(b"""\
txn 0000000000000000 " "
user ""
description ""
......
......@@ -22,7 +22,9 @@ try:
from unittest import mock
except ImportError:
# BBB python2
import mock
import mock # type: ignore
# mypy complains: error: Name 'mock' already defined (by an import)
# https://github.com/python/mypy/issues/1153#issuecomment-253842414
import pytest
......
......@@ -18,20 +18,32 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
try:
from typing import Tuple, Optional, Union, Iterable, Any, Mapping, Callable
except ImportError:
pass
import hashlib, struct, codecs
import zodburi
import zodburi # type: ignore
from six.moves.urllib_parse import urlsplit, urlunsplit
from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp
import dateparser
# XXX note that for ashex and fromhex I run mypy with a typeshed patch
# https://github.com/python/typeshed/issues/300#issuecomment-459151016
def ashex(s):
return s.encode('hex')
# type: (bytes) -> str
return codecs.encode(s, 'hex').decode()
def fromhex(s):
# type: (Union[str,bytes]) -> bytes
return codecs.decode(s, 'hex')
def sha1(data):
# type: (bytes) -> bytes
m = hashlib.sha1()
m.update(data)
return m.digest()
......@@ -53,6 +65,8 @@ def nextitem(it):
# objects of a IStorageTransactionInformation
def txnobjv(txn):
# type: (Any) -> Iterable[Any]
# XXX type ?
objv = []
for obj in txn:
assert obj.tid == txn.tid
......@@ -72,6 +86,7 @@ class TidRangeInvalid(ValueError):
def parse_tid(tid_string, raw_only=False):
# type: (str, bool) -> bytes
"""Try to parse `tid_string` as a time and returns the
corresponding raw TID.
If `tid_string` cannot be parsed as a time, assume it was
......@@ -121,19 +136,16 @@ def parse_tid(tid_string, raw_only=False):
#
# see `zodb help tidrange` for accepted tidrange syntax.
def parse_tidrange(tidrange):
# type: (str) -> Tuple[Optional[bytes], Optional[bytes]]
try:
tidmin, tidmax = tidrange.split("..")
except ValueError: # not exactly 2 parts in between ".."
raise TidRangeInvalid(tidrange)
if tidmin:
tidmin = parse_tid(tidmin)
if tidmax:
tidmax = parse_tid(tidmax)
# empty tid means -inf / +inf respectively
# ( which is None in IStorage.iterator() )
return (tidmin or None, tidmax or None)
return (parse_tid(tidmin) if tidmin else None,
parse_tid(tidmax) if tidmax else None)
# storageFromURL opens a ZODB-storage specified by url
......@@ -169,12 +181,15 @@ class NullHasher:
digest_size = 1
def update(self, data):
# type: (bytes) -> None
pass
def digest(self):
# type: () -> bytes
return b'\0'
def hexdigest(self):
# type: () -> str
return "00"
# adler32 in hashlib interface
......@@ -183,15 +198,19 @@ class Adler32Hasher:
digest_size = 4
def __init__(self):
self._h = adler32('')
# type: () -> None
self._h = adler32(b'')
def update(self, data):
# type: (bytes) -> None
self._h = adler32(data, self._h)
def digest(self):
# type: () -> bytes
return struct.pack('>I', self._h & 0xffffffff)
def hexdigest(self):
# type: () -> str
return '%08x' % (self._h & 0xffffffff)
# crc32 in hashlib interface
......@@ -200,15 +219,19 @@ class CRC32Hasher:
digest_size = 4
def __init__(self):
self._h = crc32('')
# type: () -> None
self._h = crc32(b'')
def update(self, data):
# type: (bytes) -> None
self._h = crc32(data, self._h)
def digest(self):
# type: () -> bytes
return struct.pack('>I', self._h & 0xffffffff)
def hexdigest(self):
# type: () -> str
return '%08x' % (self._h & 0xffffffff)
# {} name -> hasher
......@@ -219,4 +242,4 @@ hashRegistry = {
"sha1": hashlib.sha1,
"sha256": hashlib.sha256,
"sha512": hashlib.sha512,
}
} # type: Mapping[str, Callable] # XXX "Callable" is a bit too wide typing
......@@ -9,7 +9,7 @@ from __future__ import print_function
import sys
import os
import getopt
from six.moves import dbm_gnu as dbm
from six.moves import dbm_gnu as dbm # type: ignore
import tempfile
import shutil
from ZODB.FileStorage import FileIterator, packed_version
......
......@@ -53,6 +53,10 @@ TODO also protect txn record by hash.
"""
from __future__ import print_function
try:
from typing import Any, Set, Optional, BinaryIO, NoReturn, Union
except ImportError:
pass
from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRangeInvalid, \
storageFromURL, hashRegistry
from ZODB._compat import loads, _protocol, BytesIO
......@@ -61,6 +65,7 @@ from zodbpickle.slowpickle import Pickler as pyPickler
from ZODB.interfaces import IStorageTransactionInformation
from zope.interface import implementer
import six
import sys
import logging
import re
......@@ -92,50 +97,53 @@ def txn_raw_extension(stor, txn):
return serializeext(txn.extension)
# set of storage names already warned for not providing IStorageTransactionInformationRaw
_already_warned_notxnraw = set()
_already_warned_notxnraw = set() # type: Set[str]
# zodbdump dumps content of a ZODB storage to a file.
# please see module doc-string for dump format and details
def zodbdump(stor, tidmin, tidmax, hashonly=False, out=sys.stdout):
def zodbdump(stor, tidmin, tidmax, hashonly=False, out=sys.stdout.buffer):
# type: (Any, Optional[bytes], Optional[bytes], bool, BinaryIO) -> None
for txn in stor.iterator(tidmin, tidmax):
# XXX .status not covered by IStorageTransactionInformation
# XXX but covered by BaseStorage.TransactionRecord
out.write("txn %s %s\nuser %s\ndescription %s\nextension %s\n" % (
out.write(("txn %s %s\nuser %s\ndescription %s\nextension %s\n" % (
ashex(txn.tid), qq(txn.status),
qq(txn.user),
qq(txn.description),
qq(txn_raw_extension(stor, txn)) ))
qq(txn_raw_extension(stor, txn)) )).encode())
objv = txnobjv(txn)
for obj in objv:
entry = "obj %s " % ashex(obj.oid)
entry = b"obj %s " % ashex(obj.oid).encode()
write_data = False
if obj.data is None:
entry += "delete"
entry += b"delete"
# was undo and data taken from obj.data_txn
elif obj.data_txn is not None:
entry += "from %s" % ashex(obj.data_txn)
entry += b"from %s" % ashex(obj.data_txn)
else:
# XXX sha1 is hardcoded for now. Dump format allows other hashes.
entry += "%i sha1:%s" % (len(obj.data), ashex(sha1(obj.data)))
entry += b"%i sha1:%s" % (len(obj.data), ashex(sha1(obj.data)).encode())
write_data = True
if six.PY2:
entry = entry.encode('utf-8')
out.write(entry)
if write_data:
if hashonly:
out.write(" -")
out.write(b" -")
else:
out.write("\n")
out.write(obj.data)
out.write(b"\n")
out.write(obj.data or b"")
out.write("\n")
out.write(b"\n")
out.write("\n")
out.write(b"\n")
# ----------------------------------------
# XPickler is Pickler that tries to save objects stably
......@@ -309,13 +317,15 @@ class DumpReader(object):
# .lineno - line number position in read stream
def __init__(self, r):
self._r = r
# type (BinaryIO) -> None
self._r = r # type: BinaryIO
self._line = None # last read line
self.lineno = 0
def _readline(self):
# type: () -> Optional[bytes]
l = self._r.readline()
if l == '':
if l == b'':
self._line = None
return None # EOF
......@@ -326,11 +336,17 @@ class DumpReader(object):
# report a problem found around currently-read line
def _badline(self, msg):
raise RuntimeError("%s+%d: invalid line: %s (%r)" % (_ioname(self._r), self.lineno, msg, self._line))
# type: (str) -> NoReturn
raise RuntimeError("%s+%d: invalid line: %s (%r)" % (
_ioname(self._r),
self.lineno, msg,
# BBB produce same output in python 2 and 3
self._line.decode() if six.PY3 else self._line.encode()))
# readtxn reads one transaction record from input stream and returns
# Transaction instance or None at EOF.
def readtxn(self):
# type: () -> Optional[Transaction]
# header
l = self._readline()
if l is None:
......@@ -356,7 +372,7 @@ class DumpReader(object):
objv = []
while 1:
l = self._readline()
if l == '':
if l == b'':
break # empty line - end of transaction
if l is None or not l.startswith(b'obj '):
......@@ -366,7 +382,7 @@ class DumpReader(object):
if m is None:
self._badline('invalid obj entry')
obj = None # will be Object*
obj = None # type: Optional[Union[ObjectDelete, ObjectCopy, ObjectData]]
oid = fromhex(m.group('oid'))
from_ = m.group('from')
......@@ -380,10 +396,10 @@ class DumpReader(object):
else:
size = int(m.group('size'))
hashfunc = m.group('hashfunc')
hashfunc = m.group('hashfunc').decode()
hashok = fromhex(m.group('hash'))
hashonly = m.group('hashonly') is not None
data = None # see vvv
data = None # type: Optional[Union[HashOnly, bytes]] # see vvv
hcls = hashRegistry.get(hashfunc)
if hcls is None:
......@@ -399,7 +415,7 @@ class DumpReader(object):
chunk = self._r.read(n)
data += chunk
n -= len(chunk)
self.lineno += data.count('\n')
self.lineno += data.count(b'\n')
self._line = None
if data[-1:] != b'\n':
raise RuntimeError('%s+%d: no LF after obj data' % (_ioname(self._r), self.lineno))
......@@ -460,13 +476,14 @@ class Transaction(object):
# zdump returns text representation of a record in zodbdump format.
def zdump(self):
z = 'txn %s %s\n' % (ashex(self.tid), qq(self.status))
z += 'user %s\n' % qq(self.user)
z += 'description %s\n' % qq(self.description)
z += 'extension %s\n' % qq(self.extension_bytes)
# type: () -> bytes
z = b'txn %s %s\n' % (ashex(self.tid).encode(), qq(self.status).encode())
z += b'user %s\n' % qq(self.user).encode()
z += b'description %s\n' % qq(self.description).encode()
z += b'extension %s\n' % qq(self.extension_bytes).encode()
for obj in self.objv:
z += obj.zdump()
z += '\n'
z += b'\n'
return z
......@@ -483,7 +500,8 @@ class ObjectDelete(Object):
super(ObjectDelete, self).__init__(oid)
def zdump(self):
return 'obj %s delete\n' % (ashex(self.oid))
# type: () -> bytes
return b'obj %s delete\n' % (ashex(self.oid).encode())
# ObjectCopy represents object data copy.
class ObjectCopy(Object):
......@@ -493,7 +511,8 @@ class ObjectCopy(Object):
self.copy_from = copy_from
def zdump(self):
return 'obj %s from %s\n' % (ashex(self.oid), ashex(self.copy_from))
# type: () -> bytes
return b'obj %s from %s\n' % (ashex(self.oid).encode(), ashex(self.copy_from).encode())
# ObjectData represents record with object data.
class ObjectData(Object):
......@@ -507,19 +526,20 @@ class ObjectData(Object):
self.hash_ = hash_
def zdump(self):
# type: () -> bytes
data = self.data
hashonly = isinstance(data, HashOnly)
if hashonly:
size = data.size
else:
size = len(data)
z = 'obj %s %d %s:%s' % (ashex(self.oid), size, self.hashfunc, ashex(self.hash_))
z = b'obj %s %d %s:%s' % (ashex(self.oid).encode(), size, self.hashfunc.encode(), ashex(self.hash_).encode())
if hashonly:
z += ' -'
z += b' -'
else:
z += '\n'
z += b'\n'
z += data
z += '\n'
z += b'\n'
return z
# HashOnly indicated that this ObjectData record contains only hash and does not contain object data.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment