Commit ea534b05 authored by Julien Muchembled's avatar Julien Muchembled

protocol: switch to msgpack for packet serialization

Not only for performance reasons (which is significant in the case of
replication; tools/matrix is ~3% faster) but also because of several ugly
things in the way packets were defined:
- packet field names, which are only documentary; for roots fields,
  they even just duplicate the packet names
- a lot of repetitions for packet names, and even confusion between the name
  of the packet definition and the name of the actual notify/request packet
- the need to implement field types for anything, like PByte to support new
  compression formats, since PBoolean is not enough

neo/lib/protocol.py is now much smaller.
parent 38e98a12
......@@ -80,7 +80,7 @@ class Application(ThreadedApplication):
self._cache = ClientCache() if cache_size is None else \
ClientCache(max_size=cache_size)
self._loading_oid = None
self.new_oid_list = ()
self.new_oids = ()
Please register or sign in to reply
self.last_oid = '\0' * 8
self.storage_event_handler = storage.StorageEventHandler(self)
self.storage_bootstrap_handler = storage.StorageBootstrapHandler(self)
......@@ -187,7 +187,7 @@ class Application(ThreadedApplication):
with self._connecting_to_master_node:
result = self.master_conn
if result is None:
self.new_oid_list = ()
self.new_oids = ()
result = self.master_conn = self._connectToPrimaryNode()
return result
......@@ -312,15 +312,19 @@ class Application(ThreadedApplication):
"""Get a new OID."""
self._oid_lock_acquire()
try:
if not self.new_oid_list:
for oid in self.new_oids:
break
else:
# Get new oid list from master node
# we manage a list of oid here to prevent
# from asking too many time new oid one by one
# from master node
self._askPrimary(Packets.AskNewOIDs(100))
if not self.new_oid_list:
for oid in self.new_oids:
break
else:
raise NEOStorageError('new_oid failed')
self.last_oid = oid = self.new_oid_list.pop()
self.last_oid = oid
return oid
finally:
self._oid_lock_release()
......@@ -612,7 +616,7 @@ class Application(ThreadedApplication):
# user and description are cast to str in case they're unicode.
# BBB: This is not required anymore with recent ZODB.
packet = Packets.AskStoreTransaction(ttid, str(transaction.user),
str(transaction.description), ext, txn_context.cache_dict)
str(transaction.description), ext, list(txn_context.cache_dict))
queue = txn_context.queue
conn_dict = txn_context.conn_dict
# Ask in parallel all involved storage nodes to commit object metadata.
......@@ -697,7 +701,7 @@ class Application(ThreadedApplication):
else:
try:
notify(Packets.AbortTransaction(txn_context.ttid,
txn_context.conn_dict))
list(txn_context.conn_dict)))
  • FWIW, the python coding style for multiline arguments is AFAIK either: all parameters on the right of the parenthesis, or all indented. Here the first argument follows the former, and the second follows the latter.

  • Let's relax rules about positioning arguments. There's nothing that's not ugly in some cases and I don't like modifying an extra line (i.e. breaking git-blame) for so little/questionable benefit. I'm also not fond of isolating each argument on its own line as it tends to explode the number of lines, and I see much fewer code on screen without scrolling.

Please register or sign in to reply
except ConnectionClosed:
pass
# We don't need to flush queue, as it won't be reused by future
......@@ -736,7 +740,8 @@ class Application(ThreadedApplication):
for oid in checked_list:
del cache_dict[oid]
ttid = txn_context.ttid
p = Packets.AskFinishTransaction(ttid, cache_dict, checked_list)
p = Packets.AskFinishTransaction(ttid, list(cache_dict),
checked_list)
try:
tid = self._askPrimary(p, cache_dict=cache_dict, callback=f)
assert tid
......
......@@ -164,8 +164,7 @@ class PrimaryAnswersHandler(AnswerBaseHandler):
self.app.setHandlerData(ttid)
def answerNewOIDs(self, conn, oid_list):
oid_list.reverse()
self.app.new_oid_list = oid_list
self.app.new_oids = iter(oid_list)
def incompleteTransaction(self, conn, message):
raise NEOStorageError("storage nodes for which vote failed can not be"
......
......@@ -26,7 +26,7 @@ from .exception import NEOStorageError
class _WakeupPacket(object):
handler_method_name = 'pong'
decode = tuple
_args = ()
getId = int
class Transaction(object):
......
......@@ -16,12 +16,19 @@
from functools import wraps
from time import time
import msgpack
from msgpack.exceptions import UnpackValueError
from . import attributeTracker, logging
from .connector import ConnectorException, ConnectorDelayedConnection
from .locking import RLock
from .protocol import uuid_str, Errors, PacketMalformedError, Packets
from .util import dummy_read_buffer, ReadBuffer
from .protocol import uuid_str, Errors, PacketMalformedError, Packets, \
Unpacker
@apply
class dummy_read_buffer(msgpack.Unpacker):
def feed(self, _):
pass
class ConnectionClosed(Exception):
pass
......@@ -310,12 +317,12 @@ class Connection(BaseConnection):
client = False
server = False
peer_id = None
_parser_state = None
_total_unpacked = 0
_timeout = None
def __init__(self, event_manager, *args, **kw):
BaseConnection.__init__(self, event_manager, *args, **kw)
self.read_buf = ReadBuffer()
self.read_buf = Unpacker()
self.cur_id = 0
self.aborted = False
self.uuid = None
......@@ -425,42 +432,36 @@ class Connection(BaseConnection):
self._closure()
def _parse(self):
read = self.read_buf.read
version = read(4)
if version is None:
return
from .protocol import (ENCODED_VERSION, MAX_PACKET_SIZE,
PACKET_HEADER_FORMAT, Packets)
from .protocol import ENCODED_VERSION, Packets
read_buf = self.read_buf
version = read_buf.read_bytes(4)
if version != ENCODED_VERSION:
if len(version) < 4: # unlikely so tested last
# Not enough data and there's no API to know it in advance.
# Put it back.
read_buf.feed(version)
return
logging.warning('Protocol version mismatch with %r', self)
raise ConnectorException
header_size = PACKET_HEADER_FORMAT.size
unpack = PACKET_HEADER_FORMAT.unpack
read_next = read_buf.next
read_pos = read_buf.tell
def parse():
state = self._parser_state
if state is None:
header = read(header_size)
if header is None:
return
msg_id, msg_type, msg_len = unpack(header)
try:
packet_klass = Packets[msg_type]
except KeyError:
raise PacketMalformedError('Unknown packet type')
if msg_len > MAX_PACKET_SIZE:
  • Are we still able to enforce packet size with msgpack ? The intent of this check is to prevent remote-controlled memory explosion, and AFAICS each string exchanged may take 4GB as far as msgpack is concerned.

  • No (see added comment in neo.lib.protocol).

    prevent remote-controlled memory explosion

    Yes, if that's unintentional (bug). I mean that NEO was designed from the beginning as a single application that spans over several processes/hosts: each process fully trusts each other. Given the complexity of what we want to do, it looks impossible to open a NEO cluster to anyone. That probably even applies to all ZODB implementation because, for example, you couldn't prevent a malicious process to block all writes by not calling tpc_finish.

    From there, the check on packet size was only an assertion.

    It's ok to add checks when doable easily, but that should not block anything.

  • NEO was designed from the beginning as a single application

    Yes, and it's fine for me once the NEO protocol nature of both ends is established. But NEO must not die if an admin does a typo in a port number of an unrelated program which would happen to speak the same next-layer generic protocol as NEO (so TCP, with or without TLS, and now msgpack).

    So NEO protocol identification packet, at the very least, should not be allowed to blow memory up.

    Maybe this packet is already correctly structured, I do not know. Enforcing systematic max packet size avoids having to check, and is a generally good idea for anything network-related (hence my hope that msgpack has such feature).

  • AFAICS each string exchanged may take 4GB as far as msgpack is concerned

    I already use msgpack with max_buffer_size=64MB, so each item can't exceed this size (cf a comment in protocol.py). Which means that you can still reach 4GB by, for example, constructing a list of 1024 strings of 4MB. You can't just cause a DoS by only changing a few bytes: the following data must be valid msgpack data and for how long you want to DoS.

    Yes, and it's fine for me once the NEO protocol nature of both ends is established.

    Currently, it's only the first 4 bytes, for the version.

    So NEO protocol identification packet, at the very least, should not be allowed to blow memory up.

    The RequestIdentification has never been mandatory at the connection level. neoctl does not use it when connecting to an admin node and that was a problem when the version was checked via this packet. So in theory, enforcing a limit for the size of this packet is not enough.

    To sum up, the only way I see to improve things is to have more than the 4-bytes version at the beginning: some NEO signature. But that does not look useful to me.

  • I already use msgpack with max_buffer_size=64MB, so each item can't exceed this size

    This is good.

    Which means that you can still reach 4GB by, for example, constructing a list of 1024 strings of 4MB.

    Does msgpack validate received data against a schema while parsing ? Like "I'm nost supposed to receive a list at this place in this type of message".

    If it does, then if care is taken to not have a list of strings early in the protocol it's fine for me.

    If it doesn't, and sender can emit whatever, then I think we need to treat the initial handshake specially, outside of msgpack, with strict & defensive parsing: needs to send X bytes in a single network frame, not more, not less, and not any of the 99.99...% of values possible for these bytes. As you say below, we expect a version on 4 bytes, then as long as we do not accept many thousands of possible versions as valid at any point, then it should be fine. Adding a fixed prefix (ex: NEO\x00) would weed out other protocols which would use a similar pattern.

    You can't just cause a DoS by only changing a few bytes: the following data must be valid msgpack data and for how long you want to DoS.

    Again, I'm not talking about malicious DoS, but about "I crossed the streams !" accidents must not result in a trainwreck, database data loss, NEO process crash, etc.

    The RequestIdentification has never been mandatory at the connection level. neoctl does not use it when connecting to an admin node and that was a problem when the version was checked via this packet. So in theory, enforcing a limit for the size of this packet is not enough.

    Is it ? I seem to remember a time where connections received a minimal handler on establishment until identification happened, and only then switched to the full handler. This was IIRC needed at least on master and storage nodes as more than one type of node connect, and they need to be distinguished so the proper handler is chosen. Am I making memories up ?

    To sum up, the only way I see to improve things is to have more than the 4-bytes version at the beginning: some NEO signature.

    I agree.

    But that does not look useful to me.

    I disagree.

    Also, it should be rather easy to implement: I believe that now that SSL support is implemented the connection is being polled until SSL handshake is finished. This should be possible to extend to also cover emission & reception of such mandatory pair of packets with minimal code footprint, and a half-RTT of latency (which is as low as it gets) added on connection establishment (which should be a rare event, so pointless to micro-optimise for).

  • As you say below, we expect a version on 4 bytes, then as long as we do not accept many thousands of possible versions as valid at any point, then it should be fine.

    There's no plan to accept other values than the one defined in the code.

    You can't just cause a DoS by only changing a few bytes: the following data must be valid msgpack data and for how long you want to DoS.

    Again, I'm not talking about malicious DoS, but about "I crossed the streams !" accidents must not result in a trainwreck, database data loss, NEO process crash, etc.

    I wasn't talking about malicious DoS.

    Does msgpack validate received data against a schema while parsing ?

    It doesn't.

    Is it ? I seem to remember a time where connections received a minimal handler on establishment until identification happened, and only then switched to the full handler. This was IIRC needed at least on master and storage nodes as more than one type of node connect, and they need to be distinguished so the proper handler is chosen. Am I making memories up ?

    See a60e36e8, also the fact that there's no node type for neoctl (to fill the first field of RequestIdentification).

    I'm not saying a RequestIdentification is not mandatory for nodes that have a node type defined. What I wrote is that it's not mandatory at the connection level (neo.lib.connection.py).

Please register or sign in to reply
raise PacketMalformedError('message too big (%d)' % msg_len)
else:
msg_id, packet_klass, msg_len = state
data = read(msg_len)
if data is None:
# Not enough.
if state is None:
self._parser_state = msg_id, packet_klass, msg_len
else:
self._parser_state = None
packet = packet_klass()
packet.setContent(msg_id, data)
return packet
try:
msg_id, msg_type, args = read_next()
except StopIteration:
return
except UnpackValueError as e:
raise PacketMalformedError(str(e))
try:
packet_klass = Packets[msg_type]
except KeyError:
raise PacketMalformedError('Unknown packet type')
pos = read_pos()
packet = packet_klass(*args)
packet.setId(msg_id)
packet.size = pos - self._total_unpacked
self._total_unpacked = pos
return packet
self._parse = parse
return parse()
......@@ -513,7 +514,7 @@ class Connection(BaseConnection):
def close(self):
if self.connector is None:
assert self._on_close is None
assert not self.read_buf
assert not self.read_buf.read_bytes(1)
assert not self.isPending()
return
# process the network events with the last registered handler to
......@@ -524,7 +525,7 @@ class Connection(BaseConnection):
if self._on_close is not None:
self._on_close()
self._on_close = None
self.read_buf.clear()
self.read_buf = dummy_read_buffer
try:
if self.connecting:
handler.connectionFailed(self)
......
......@@ -80,9 +80,8 @@ class SocketConnector(object):
def queue(self, data):
was_empty = not self.queued
self.queued += data
for data in data:
self.queue_size += len(data)
self.queued.append(data)
self.queue_size += len(data)
return was_empty
def _error(self, op, exc=None):
......@@ -172,7 +171,7 @@ class SocketConnector(object):
except socket.error, e:
self._error('recv', e)
if data:
read_buf.append(data)
read_buf.feed(data)
return
self._error('recv')
......@@ -278,7 +277,7 @@ class _SSL:
def receive(self, read_buf):
try:
while 1:
read_buf.append(self.socket.recv(4096))
read_buf.feed(self.socket.recv(4096))
except ssl.SSLWantReadError:
pass
except socket.error, e:
......
......@@ -23,7 +23,7 @@ NOBODY = []
class _ConnectionClosed(object):
handler_method_name = 'connectionClosed'
decode = tuple
_args = ()
class getId(object):
def __eq__(self, other):
......
......@@ -68,7 +68,7 @@ class EventHandler(object):
method = getattr(self, packet.handler_method_name)
except AttributeError:
raise UnexpectedPacketError('no handler found')
args = packet.decode() or ()
args = packet._args
method(conn, *args, **kw)
except DelayEvent, e:
assert not kw, kw
......@@ -76,9 +76,6 @@ class EventHandler(object):
except UnexpectedPacketError, e:
if not conn.isClosed():
self.__unexpectedPacket(conn, packet, *e.args)
except PacketMalformedError, e:
logging.error('malformed packet from %r: %s', conn, e)
conn.close()
except NotReadyError, message:
if not conn.isClosed():
if not message.args:
......
......@@ -152,7 +152,8 @@ class NEOLogger(Logger):
def _setup(self, filename=None, reset=False):
from . import protocol as p
global uuid_str
global packb, uuid_str
packb = p.packb
uuid_str = p.uuid_str
if self._db is not None:
self._db.close()
......@@ -250,7 +251,7 @@ class NEOLogger(Logger):
'>' if r.outgoing else '<', uuid_str(r.uuid), ip, port)
msg = r.msg
if msg is not None:
msg = buffer(msg)
msg = buffer(msg if type(msg) is bytes else packb(msg))
q = "INSERT INTO packet VALUES (?,?,?,?,?,?)"
x = [r.created, nid, r.msg_id, r.code, peer, msg]
else:
......@@ -299,9 +300,14 @@ class NEOLogger(Logger):
def packet(self, connection, packet, outgoing):
if self._db is not None:
body = packet._body
if self._max_packet and self._max_packet < len(body):
body = None
if self._max_packet and self._max_packet < packet.size:
args = None
else:
args = packet._args
try:
hash(args)
except TypeError:
args = packb(args)
self._queue(PacketRecord(
created=time(),
msg_id=packet._id,
......@@ -309,7 +315,7 @@ class NEOLogger(Logger):
outgoing=outgoing,
uuid=connection.getUUID(),
addr=connection.getAddress(),
msg=body))
msg=args))
def node(self, *cluster_nid):
name = self.name and str(self.name)
......
......@@ -14,27 +14,58 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import traceback
from cStringIO import StringIO
from struct import Struct
import struct, threading
from functools import partial
# The protocol version must be increased whenever upgrading a node may require
# to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and
# the high order byte 0 is different from TLS Handshake (0x16).
PROTOCOL_VERSION = 5
ENCODED_VERSION = Struct('!L').pack(PROTOCOL_VERSION)
PROTOCOL_VERSION = 6
ENCODED_VERSION = struct.pack('!L', PROTOCOL_VERSION)
# Avoid memory errors on corrupted data.
MAX_PACKET_SIZE = 0x4000000
PACKET_HEADER_FORMAT = Struct('!LHL')
RESPONSE_MASK = 0x8000
# Avoid some memory errors on corrupted data.
# Before we use msgpack, we limited the size of a whole packet. That's not
# possible anymore because the size is not known in advance. Packets bigger
# than the buffer size are possible (e.g. a huge list of small items) and for
# that we could compare the stream position (Unpacker.tell); it's not worth it.
UNPACK_BUFFER_SIZE = 0x4000000
@apply
def Unpacker():
global registerExtType, packb
from msgpack import ExtType, unpackb, Packer, Unpacker
ext_type_dict = []
kw = dict(use_bin_type=True)
pack_ext = Packer(**kw).pack
def registerExtType(getstate, make):
code = len(ext_type_dict)
ext_type_dict.append(lambda data: make(unpackb(data, use_list=False)))
return lambda obj: ExtType(code, pack_ext(getstate(obj)))
iterable_types = set, tuple
def default(obj):
try:
pack = obj._pack
except AttributeError:
assert type(obj) in iterable_types, type(obj)
return list(obj)
return pack()
lock = threading.Lock()
pack = Packer(default, strict_types=True, **kw).pack
def packb(obj):
with lock: # in case that 'default' is called
return pack(obj)
return partial(Unpacker, use_list=False, max_buffer_size=UNPACK_BUFFER_SIZE,
ext_hook=lambda code, data: ext_type_dict[code](data))
class Enum(tuple):
class Item(int):
__slots__ = '_name', '_enum'
__slots__ = '_name', '_enum', '_pack'
def __str__(self):
return self._name
def __repr__(self):
......@@ -49,30 +80,38 @@ class Enum(tuple):
names = func.func_code.co_names
self = tuple.__new__(cls, map(cls.Item, xrange(len(names))))
self._name = func.__name__
pack = registerExtType(int, self.__getitem__)
for item, name in zip(self, names):
setattr(self, name, item)
item._name = name
item._enum = self
item._pack = (lambda x: lambda: x)(pack(item))
return self
def __repr__(self):
return "<Enum %s>" % self._name
# The order of extension type is important.
# Enum types first, sorted alphabetically.
@Enum
def ErrorCodes():
ACK
NOT_READY
OID_NOT_FOUND
TID_NOT_FOUND
OID_DOES_NOT_EXIST
PROTOCOL_ERROR
REPLICATION_ERROR
CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED
NON_READABLE_CELL
READ_ONLY_ACCESS
INCOMPLETE_TRANSACTION
def CellStates():
# Write-only cell. Last transactions are missing because storage is/was down
# for a while, or because it is new for the partition. It usually becomes
# UP_TO_DATE when replication is done.
OUT_OF_DATE
# Normal state: cell is writable/readable, and it isn't planned to drop it.
UP_TO_DATE
# Same as UP_TO_DATE, except that it will be discarded as soon as another
# node finishes to replicate it. It means a partition is moved from 1 node
# to another. It is also discarded immediately if out-of-date.
FEEDING
# A check revealed that data differs from other replicas. Cell is neither
# readable nor writable.
CORRUPTED
# Not really a state: only used in network packets to tell storages to drop
# partitions.
DISCARDED
@Enum
def ClusterStates():
......@@ -107,11 +146,19 @@ def ClusterStates():
STOPPING_BACKUP
@Enum
def NodeTypes():
MASTER
STORAGE
CLIENT
ADMIN
def ErrorCodes():
ACK
NOT_READY
OID_NOT_FOUND
TID_NOT_FOUND
OID_DOES_NOT_EXIST
PROTOCOL_ERROR
REPLICATION_ERROR
CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED
NON_READABLE_CELL
READ_ONLY_ACCESS
INCOMPLETE_TRANSACTION
@Enum
def NodeStates():
......@@ -121,23 +168,11 @@ def NodeStates():
PENDING
@Enum
def CellStates():
# Write-only cell. Last transactions are missing because storage is/was down
# for a while, or because it is new for the partition. It usually becomes
# UP_TO_DATE when replication is done.
OUT_OF_DATE
# Normal state: cell is writable/readable, and it isn't planned to drop it.
UP_TO_DATE
# Same as UP_TO_DATE, except that it will be discarded as soon as another
# node finishes to replicate it. It means a partition is moved from 1 node
# to another. It is also discarded immediately if out-of-date.
FEEDING
# A check revealed that data differs from other replicas. Cell is neither
# readable nor writable.
CORRUPTED
# Not really a state: only used in network packets to tell storages to drop
# partitions.
DISCARDED
def NodeTypes():
MASTER
STORAGE
CLIENT
ADMIN
# used for logging
node_state_prefix_dict = {
......@@ -212,45 +247,24 @@ class NonReadableCell(Exception):
On such event, the client must retry, preferably another cell.
"""
class Packet(object):
"""
Base class for any packet definition. The _fmt class attribute must be
defined for any non-empty packet.
Base class for any packet definition.
"""
_ignore_when_closed = False
_request = None
_answer = None
_body = None
_code = None
_fmt = None
_id = None
allow_dict = False
nodelay = True
poll_thread = False
def __init__(self, *args):
assert self._code is not None, "Packet class not registered"
if args:
buf = StringIO()
self._fmt.encode(buf.write, args)
self._body = buf.getvalue()
else:
self._body = ''
def decode(self):
assert self._body is not None
if self._fmt is None:
return ()
buf = StringIO(self._body)
try:
return self._fmt.decode(buf.read)
except ParseError, msg:
name = self.__class__.__name__
raise PacketMalformedError("%s fail (%s)" % (name, msg))
def setContent(self, msg_id, body):
""" Register the packet content for future decoding """
self._id = msg_id
self._body = body
assert self.allow_dict or dict not in map(type, args), args
self._args = args
def setId(self, value):
self._id = value
......@@ -259,14 +273,11 @@ class Packet(object):
assert self._id is not None, "No identifier applied on the packet"
return self._id
def encode(self):
def encode(self, packb=packb):
""" Encode a packet as a string to send it over the network """
content = self._body
return (PACKET_HEADER_FORMAT.pack(self._id, self._code, len(content)),
content)
def __len__(self):
return PACKET_HEADER_FORMAT.size + len(self._body)
r = packb((self._id, self._code, self._args))
self.size = len(r)
return r
def __repr__(self):
return '%s[%r]' % (self.__class__.__name__, self._id)
......@@ -279,10 +290,10 @@ class Packet(object):
return self._code == other._code
def isError(self):
return isinstance(self, Error)
return self._code == RESPONSE_MASK
def isResponse(self):
return self._code & RESPONSE_MASK == RESPONSE_MASK
return self._code & RESPONSE_MASK
def getAnswerClass(self):
return self._answer
......@@ -294,1530 +305,524 @@ class Packet(object):
"""
return self._ignore_when_closed
class ParseError(Exception):
"""
An exception that encapsulate another and build the 'path' of the
packet item that generate the error.
"""
def __init__(self, item, trace):
Exception.__init__(self)
self._trace = trace
self._items = [item]
def append(self, item):
self._items.append(item)
def __repr__(self):
chain = '/'.join([item.getName() for item in reversed(self._items)])
return 'at %s:\n%s' % (chain, self._trace)
__str__ = __repr__
# packet parsers
class PItem(object):
"""
Base class for any packet item, _encode and _decode must be overridden
by subclasses.
"""
def __init__(self, name):
self._name = name
def __repr__(self):
return self.__class__.__name__
def getName(self):
return self._name
def _trace(self, method, *args):
try:
return method(*args)
except ParseError, e:
# trace and forward exception
e.append(self)
raise
except Exception:
# original exception, encapsulate it
trace = ''.join(traceback.format_exception(*sys.exc_info())[2:])
raise ParseError(self, trace)
def encode(self, writer, items):
return self._trace(self._encode, writer, items)
def decode(self, reader):
return self._trace(self._decode, reader)
def _encode(self, writer, items):
raise NotImplementedError, self.__class__.__name__
class PacketRegistryFactory(dict):
def _decode(self, reader):
raise NotImplementedError, self.__class__.__name__
class PStruct(PItem):
"""
Aggregate other items
"""
def __init__(self, name, *items):
PItem.__init__(self, name)
self._items = items
def _encode(self, writer, items):
assert len(self._items) == len(items), (items, self._items)
for item, value in zip(self._items, items):
item.encode(writer, value)
def _decode(self, reader):
return tuple([item.decode(reader) for item in self._items])
class PStructItem(PItem):
"""
A single value encoded with struct
"""
def __init__(self, name):
PItem.__init__(self, name)
struct = Struct(self._fmt)
self.pack = struct.pack
self.unpack = struct.unpack
self.size = struct.size
def _encode(self, writer, value):
writer(self.pack(value))
def _decode(self, reader):
return self.unpack(reader(self.size))[0]
class PStructItemOrNone(PStructItem):
def _encode(self, writer, value):
return writer(self._None if value is None else self.pack(value))
def _decode(self, reader):
value = reader(self.size)
return None if value == self._None else self.unpack(value)[0]
class POption(PStruct):
def _encode(self, writer, value):
if value is None:
writer('\0')
def __call__(self, name, base, d):
for k, v in d.items():
if isinstance(v, type) and issubclass(v, Packet):
v.__name__ = k
v.handler_method_name = k[0].lower() + k[1:]
# this builds a "singleton"
return type('PacketRegistry', base, d)(self)
def register(self, doc, ignore_when_closed=None, request=False, error=False,
_base=(Packet,), **kw):
""" Register a packet in the packet registry """
code = len(self)
if doc is None:
self[code] = None
return # None registered only to skip a code number (for compatibility)
if error and not request:
assert not code
code = RESPONSE_MASK
kw.update(__doc__=doc, _code=code)
packet = type('', _base, kw)
# register the request
self[code] = packet
if request:
if ignore_when_closed is None:
# By default, on a closed connection:
# - request: ignore
# - answer: keep
# - notification: keep
packet._ignore_when_closed = True
else:
assert ignore_when_closed is False
if error:
packet._answer = self[RESPONSE_MASK]
else:
# build a class for the answer
code |= RESPONSE_MASK
kw['_code'] = code
answer = packet._answer = self[code] = type('', _base, kw)
return packet, answer
else:
writer('\1')
PStruct._encode(self, writer, value)
assert ignore_when_closed is None
return packet
def _decode(self, reader):
if '\0\1'.index(reader(1)):
return PStruct._decode(self, reader)
class PList(PStructItem):
class Packets(dict):
"""
A list of homogeneous items
Packet registry that checks packet code uniqueness and provides an index
"""
_fmt = '!L'
__metaclass__ = PacketRegistryFactory()
notify = __metaclass__.register
request = partial(notify, request=True)
def __init__(self, name, item):
PStructItem.__init__(self, name)
self._item = item
Error = notify("""
Error is a special type of message, because this can be sent against
any other message, even if such a message does not expect a reply
usually.
def _encode(self, writer, items):
writer(self.pack(len(items)))
item = self._item
for value in items:
item.encode(writer, value)
:nodes: * -> *
""", error=True)
def _decode(self, reader):
length = self.unpack(reader(self.size))[0]
item = self._item
return [item.decode(reader) for _ in xrange(length)]
RequestIdentification, AcceptIdentification = request("""
Request a node identification. This must be the first packet for any
connection.
class PDict(PStructItem):
"""
A dictionary with custom key and value formats
"""
_fmt = '!L'
def __init__(self, name, key, value):
PStructItem.__init__(self, name)
self._key = key
self._value = value
def _encode(self, writer, item):
assert isinstance(item , dict), (type(item), item)
writer(self.pack(len(item)))
key, value = self._key, self._value
for k, v in item.iteritems():
key.encode(writer, k)
value.encode(writer, v)
def _decode(self, reader):
length = self.unpack(reader(self.size))[0]
key, value = self._key, self._value
new_dict = {}
for _ in xrange(length):
k = key.decode(reader)
v = value.decode(reader)
new_dict[k] = v
return new_dict
class PEnum(PStructItem):
"""
Encapsulate an enumeration value
"""
_fmt = 'b'
:nodes: * -> *
""", poll_thread=True)
def __init__(self, name, enum):
PStructItem.__init__(self, name)
self._enum = enum
Ping, Pong = request("""
Empty request used as network barrier.
def _encode(self, writer, item):
if item is None:
item = -1
writer(self.pack(item))
:nodes: * -> *
""")
def _decode(self, reader):
code = self.unpack(reader(self.size))[0]
if code == -1:
return None
try:
return self._enum[code]
except KeyError:
enum = self._enum.__class__.__name__
raise ValueError, 'Invalid code for %s enum: %r' % (enum, code)
CloseClient = notify("""
Tell peer that it can close the connection if it has finished with us.
class PString(PStructItem):
"""
A variable-length string
"""
_fmt = '!L'
:nodes: * -> *
""")
def _encode(self, writer, value):
writer(self.pack(len(value)))
writer(value)
AskPrimary, AnswerPrimary = request("""
Ask node identier of the current primary master.
def _decode(self, reader):
length = self.unpack(reader(self.size))[0]
return reader(length)
:nodes: ctl -> A
""")
class PAddress(PString):
"""
An host address (IPv4/IPv6)
"""
NotPrimaryMaster = notify("""
Notify peer that I'm not the primary master. Attach any extra
information to help the peer joining the cluster.
def __init__(self, name):
PString.__init__(self, name)
self._port = Struct('!H')
:nodes: SM -> *
""")
def _encode(self, writer, address):
if address:
host, port = address
PString._encode(self, writer, host)
writer(self._port.pack(port))
else:
PString._encode(self, writer, '')
NotifyNodeInformation = notify("""
Notify information about one or more nodes.
def _decode(self, reader):
host = PString._decode(self, reader)
if host:
p = self._port
return host, p.unpack(reader(p.size))[0]
:nodes: M -> *
""")
class PBoolean(PStructItem):
"""
A boolean value, encoded as a single byte
"""
_fmt = '!?'
AskRecovery, AnswerRecovery = request("""
Ask storage nodes data needed by master to recover.
Reused by `neoctl print ids`.
class PNumber(PStructItem):
"""
A integer number (4-bytes length)
"""
_fmt = '!L'
:nodes: M -> S; ctl -> A -> M
""")
class PIndex(PStructItem):
"""
A big integer to defined indexes in a huge list.
"""
_fmt = '!Q'
AskLastIDs, AnswerLastIDs = request("""
Ask the last OID/TID so that a master can initialize its
TransactionManager. Reused by `neoctl print ids`.
class PPTID(PStructItemOrNone):
"""
A None value means an invalid PTID
"""
_fmt = '!Q'
_None = Struct(_fmt).pack(0)
:nodes: M -> S; ctl -> A -> M
""")
class PChecksum(PItem):
"""
A hash (SHA1)
"""
def _encode(self, writer, checksum):
assert len(checksum) == 20, (len(checksum), checksum)
writer(checksum)
AskPartitionTable, AnswerPartitionTable = request("""
Ask storage node the remaining data needed by master to recover.
This is also how the clients get the full partition table on connection.
def _decode(self, reader):
return reader(20)
:nodes: M -> S; C -> M
""")
class PSignedNull(PStructItemOrNone):
_fmt = '!l'
_None = Struct(_fmt).pack(0)
SendPartitionTable = notify("""
Send the full partition table to admin/storage nodes on connection.
class PUUID(PSignedNull):
"""
An UUID (node identifier, 4-bytes signed integer)
"""
:nodes: M -> A, S
""")
class PTID(PItem):
"""
A transaction identifier
"""
def _encode(self, writer, tid):
if tid is None:
tid = INVALID_TID
assert len(tid) == 8, (len(tid), tid)
writer(tid)
def _decode(self, reader):
tid = reader(8)
if tid == INVALID_TID:
tid = None
return tid
# same definition, for now
POID = PTID
class PFloat(PStructItemOrNone):
"""
A float number (8-bytes length)
"""
_fmt = '!d'
_None = '\xff' * 8
# common definitions
PFEmpty = PStruct('no_content')
PFNodeType = PEnum('type', NodeTypes)
PFNodeState = PEnum('state', NodeStates)
PFCellState = PEnum('state', CellStates)
PFNodeList = PList('node_list',
PStruct('node',
PFNodeType,
PAddress('address'),
PUUID('uuid'),
PFNodeState,
PFloat('id_timestamp'),
),
)
PFCellList = PList('cell_list',
PStruct('cell',
PUUID('uuid'),
PFCellState,
),
)
PFRowList = PList('row_list',
PStruct('row',
PNumber('offset'),
PFCellList,
),
)
PFHistoryList = PList('history_list',
PStruct('history_entry',
PTID('serial'),
PNumber('size'),
),
)
PFUUIDList = PList('uuid_list',
PUUID('uuid'),
)
PFTidList = PList('tid_list',
PTID('tid'),
)
PFOidList = PList('oid_list',
POID('oid'),
)
# packets definition
class Error(Packet):
"""
Error is a special type of message, because this can be sent against
any other message, even if such a message does not expect a reply
usually.
NotifyPartitionChanges = notify("""
Notify about changes in the partition table.
:nodes: * -> *
"""
_fmt = PStruct('error',
PNumber('code'),
PString('message'),
)
:nodes: M -> *
""")
class Ping(Packet):
"""
Empty request used as network barrier.
StartOperation = notify("""
Tell a storage node to start operation. Before this message,
it must only communicate with the primary master.
:nodes: * -> *
"""
_answer = PFEmpty
:nodes: M -> S
""")
class CloseClient(Packet):
"""
Tell peer that it can close the connection if it has finished with us.
StopOperation = notify("""
Notify that the cluster is not operational anymore.
Any operation between nodes must be aborted.
:nodes: * -> *
"""
:nodes: M -> S, C
""")
class RequestIdentification(Packet):
"""
Request a node identification. This must be the first packet for any
connection.
AskUnfinishedTransactions, AnswerUnfinishedTransactions = request("""
Ask unfinished transactions, which will be replicated
when they're finished.
:nodes: * -> *
"""
poll_thread = True
_fmt = PStruct('request_identification',
PFNodeType,
PUUID('uuid'),
PAddress('address'),
PString('name'),
PList('devpath', PString('devid')),
PFloat('id_timestamp'),
)
_answer = PStruct('accept_identification',
PFNodeType,
PUUID('my_uuid'),
PNumber('num_partitions'),
PNumber('num_replicas'),
PUUID('your_uuid'),
)
class PrimaryMaster(Packet):
"""
Ask node identier of the current primary master.
:nodes: S -> M
""")
:nodes: ctl -> A
"""
_answer = PStruct('answer_primary',
PUUID('primary_uuid'),
)
AskLockedTransactions, AnswerLockedTransactions = request("""
Ask locked transactions to replay committed transactions
that haven't been unlocked.
class NotPrimaryMaster(Packet):
"""
Notify peer that I'm not the primary master. Attach any extra information
to help the peer joining the cluster.
:nodes: M -> S
""", allow_dict=True)
:nodes: SM -> *
"""
_fmt = PStruct('not_primary_master',
PSignedNull('primary'),
PList('known_master_list',
PAddress('address'),
),
)
class Recovery(Packet):
"""
Ask storage nodes data needed by master to recover.
Reused by `neoctl print ids`.
AskFinalTID, AnswerFinalTID = request("""
Return final tid if ttid has been committed, to recover from certain
failures during tpc_finish.
:nodes: M -> S; ctl -> A -> M
"""
_answer = PStruct('answer_recovery',
PPTID('ptid'),
PTID('backup_tid'),
PTID('truncate_tid'),
)
:nodes: M -> S; C -> M, S
""")
class LastIDs(Packet):
"""
Ask the last OID/TID so that a master can initialize its TransactionManager.
Reused by `neoctl print ids`.
ValidateTransaction = notify("""
Do replay a committed transaction that was not unlocked.
:nodes: M -> S; ctl -> A -> M
"""
_answer = PStruct('answer_last_ids',
POID('last_oid'),
PTID('last_tid'),
)
:nodes: M -> S
""")
class PartitionTable(Packet):
"""
Ask storage node the remaining data needed by master to recover.
This is also how the clients get the full partition table on connection.
AskBeginTransaction, AnswerBeginTransaction = request("""
Ask to begin a new transaction. This maps to `tpc_begin`.
:nodes: M -> S; C -> M
"""
_answer = PStruct('answer_partition_table',
PPTID('ptid'),
PFRowList,
)
:nodes: C -> M
""")
class NotifyPartitionTable(Packet):
"""
Send the full partition table to admin/storage nodes on connection.
FailedVote = request("""
Report storage nodes for which vote failed.
True is returned if it's still possible to finish the transaction.
:nodes: M -> A, S
"""
_fmt = PStruct('send_partition_table',
PPTID('ptid'),
PFRowList,
)
:nodes: C -> M
""", error=True)
class PartitionChanges(Packet):
"""
Notify about changes in the partition table.
AskFinishTransaction, AnswerTransactionFinished = request("""
Finish a transaction. Return the TID of the committed transaction.
This maps to `tpc_finish`.
:nodes: M -> *
"""
_fmt = PStruct('notify_partition_changes',
PPTID('ptid'),
PList('cell_list',
PStruct('cell',
PNumber('offset'),
PUUID('uuid'),
PFCellState,
),
),
)
class StartOperation(Packet):
"""
Tell a storage node to start operation. Before this message, it must only
communicate with the primary master.
:nodes: C -> M
""", ignore_when_closed=False, poll_thread=True)
:nodes: M -> S
"""
_fmt = PStruct('start_operation',
# XXX: Is this boolean needed ? Maybe this
# can be deduced from cluster state.
PBoolean('backup'),
)
AskLockInformation, AnswerInformationLocked = request("""
Commit a transaction. The new data is read-locked.
class StopOperation(Packet):
"""
Notify that the cluster is not operational anymore. Any operation between
nodes must be aborted.
:nodes: M -> S
""", ignore_when_closed=False)
:nodes: M -> S, C
"""
InvalidateObjects = notify("""
Notify about a new transaction modifying objects,
invalidating client caches.
class UnfinishedTransactions(Packet):
"""
Ask unfinished transactions, which will be replicated when they're finished.
:nodes: M -> C
""")
:nodes: S -> M
"""
_fmt = PStruct('ask_unfinished_transactions',
PList('row_list',
PNumber('offset'),
),
)
_answer = PStruct('answer_unfinished_transactions',
PTID('max_tid'),
PList('tid_list',
PTID('unfinished_tid'),
),
)
class LockedTransactions(Packet):
"""
Ask locked transactions to replay committed transactions that haven't been
unlocked.
NotifyUnlockInformation = notify("""
Notify about a successfully committed transaction. The new data can be
unlocked.
:nodes: M -> S
"""
_answer = PStruct('answer_locked_transactions',
PDict('tid_dict',
PTID('ttid'),
PTID('tid'),
),
)
class FinalTID(Packet):
"""
Return final tid if ttid has been committed, to recover from certain
failures during tpc_finish.
:nodes: M -> S
""")
:nodes: M -> S; C -> M, S
"""
_fmt = PStruct('final_tid',
PTID('ttid'),
)
AskNewOIDs, AnswerNewOIDs = request("""
Ask new OIDs to create objects.
_answer = PStruct('final_tid',
PTID('tid'),
)
:nodes: C -> M
""")
class ValidateTransaction(Packet):
"""
Do replay a committed transaction that was not unlocked.
NotifyDeadlock = notify("""
Ask master to generate a new TTID that will be used by the client to
solve a deadlock by rebasing the transaction on top of concurrent
changes.
:nodes: M -> S
"""
_fmt = PStruct('validate_transaction',
PTID('ttid'),
PTID('tid'),
)
:nodes: S -> M -> C
""")
class BeginTransaction(Packet):
"""
Ask to begin a new transaction. This maps to `tpc_begin`.
AskRebaseTransaction, AnswerRebaseTransaction = request("""
Rebase a transaction to solve a deadlock.
:nodes: C -> M
"""
_fmt = PStruct('ask_begin_transaction',
PTID('tid'),
)
:nodes: C -> S
""")
_answer = PStruct('answer_begin_transaction',
PTID('tid'),
)
AskRebaseObject, AnswerRebaseObject = request("""
Rebase an object change to solve a deadlock.
class FailedVote(Packet):
"""
Report storage nodes for which vote failed.
True is returned if it's still possible to finish the transaction.
:nodes: C -> S
:nodes: C -> M
"""
_fmt = PStruct('failed_vote',
PTID('tid'),
PFUUIDList,
)
XXX: It is a request packet to simplify the implementation. For more
efficiency, this should be turned into a notification, and the
RebaseTransaction should answered once all objects are rebased
(so that the client can still wait on something).
""", data_path=(1, 0, 2, 0))
_answer = Error
AskStoreObject, AnswerStoreObject = request("""
Ask to create/modify an object. This maps to `store`.
class FinishTransaction(Packet):
"""
Finish a transaction. Return the TID of the committed transaction.
This maps to `tpc_finish`.
As for IStorage, 'serial' is ZERO_TID for new objects.
:nodes: C -> M
"""
poll_thread = True
_fmt = PStruct('ask_finish_transaction',
PTID('tid'),
PFOidList,
PList('checked_list',
POID('oid'),
),
)
_answer = PStruct('answer_information_locked',
PTID('ttid'),
PTID('tid'),
)
class NotifyTransactionFinished(Packet):
"""
Notify that a transaction blocking a replication is now finished.
:nodes: C -> S
""", data_path=(0, 2))
:nodes: M -> S
"""
_fmt = PStruct('notify_transaction_finished',
PTID('ttid'),
PTID('max_tid'),
)
AbortTransaction = notify("""
Abort a transaction. This maps to `tpc_abort`.
class LockInformation(Packet):
"""
Commit a transaction. The new data is read-locked.
:nodes: C -> S; C -> M -> S
""")
:nodes: M -> S
"""
_fmt = PStruct('ask_lock_informations',
PTID('ttid'),
PTID('tid'),
)
AskStoreTransaction, AnswerStoreTransaction = request("""
Ask to store a transaction. Implies vote.
_answer = PStruct('answer_information_locked',
PTID('ttid'),
)
:nodes: C -> S
""")
class InvalidateObjects(Packet):
"""
Notify about a new transaction modifying objects,
invalidating client caches.
AskVoteTransaction, AnswerVoteTransaction = request("""
Ask to vote a transaction.
:nodes: M -> C
"""
_fmt = PStruct('ask_finish_transaction',
PTID('tid'),
PFOidList,
)
:nodes: C -> S
""")
class UnlockInformation(Packet):
"""
Notify about a successfully committed transaction. The new data can be
unlocked.
AskObject, AnswerObject = request("""
Ask a stored object by its OID, optionally at/before a specific tid.
This maps to `load/loadBefore/loadSerial`.
:nodes: M -> S
"""
_fmt = PStruct('notify_unlock_information',
PTID('ttid'),
)
:nodes: C -> S
""", data_path=(1, 3))
class GenerateOIDs(Packet):
"""
Ask new OIDs to create objects.
AskTIDs, AnswerTIDs = request("""
Ask for TIDs between a range of offsets. The order of TIDs is
descending, and the range is [first, last). This maps to `undoLog`.
:nodes: C -> M
"""
_fmt = PStruct('ask_new_oids',
PNumber('num_oids'),
)
:nodes: C -> S
""")
_answer = PStruct('answer_new_oids',
PFOidList,
)
AskTransactionInformation, AnswerTransactionInformation = request("""
Ask for transaction metadata.
class Deadlock(Packet):
"""
Ask master to generate a new TTID that will be used by the client to solve
a deadlock by rebasing the transaction on top of concurrent changes.
:nodes: C -> S
""")
:nodes: S -> M -> C
"""
_fmt = PStruct('notify_deadlock',
PTID('ttid'),
PTID('locking_tid'),
)
AskObjectHistory, AnswerObjectHistory = request("""
Ask history information for a given object. The order of serials is
descending, and the range is [first, last]. This maps to `history`.
class RebaseTransaction(Packet):
"""
Rebase a transaction to solve a deadlock.
:nodes: C -> S
"""
_fmt = PStruct('ask_rebase_transaction',
PTID('ttid'),
PTID('locking_tid'),
)
_answer = PStruct('answer_rebase_transaction',
PFOidList,
)
class RebaseObject(Packet):
"""
Rebase an object change to solve a deadlock.
:nodes: C -> S
XXX: It is a request packet to simplify the implementation. For more
efficiency, this should be turned into a notification, and the
RebaseTransaction should answered once all objects are rebased
(so that the client can still wait on something).
"""
_fmt = PStruct('ask_rebase_object',
PTID('ttid'),
PTID('oid'),
)
_answer = PStruct('answer_rebase_object',
POption('conflict',
PTID('serial'),
PTID('conflict_serial'),
POption('data',
PBoolean('compression'),
PChecksum('checksum'),
PString('data'),
),
)
)
class StoreObject(Packet):
"""
Ask to create/modify an object. This maps to `store`.
As for IStorage, 'serial' is ZERO_TID for new objects.
:nodes: C -> S
"""
_fmt = PStruct('ask_store_object',
POID('oid'),
PTID('serial'),
PBoolean('compression'),
PChecksum('checksum'),
PString('data'),
PTID('data_serial'),
PTID('tid'),
)
_answer = PStruct('answer_store_object',
PTID('conflict'),
)
class AbortTransaction(Packet):
"""
Abort a transaction. This maps to `tpc_abort`.
:nodes: C -> S; C -> M -> S
"""
_fmt = PStruct('abort_transaction',
PTID('tid'),
PFUUIDList, # unused for * -> S
)
class StoreTransaction(Packet):
"""
Ask to store a transaction. Implies vote.
:nodes: C -> S
"""
_fmt = PStruct('ask_store_transaction',
PTID('tid'),
PString('user'),
PString('description'),
PString('extension'),
PFOidList,
)
_answer = PFEmpty
class VoteTransaction(Packet):
"""
Ask to vote a transaction.
:nodes: C -> S
"""
_fmt = PStruct('ask_vote_transaction',
PTID('tid'),
)
_answer = PFEmpty
class GetObject(Packet):
"""
Ask a stored object by its OID, optionally at/before a specific tid.
This maps to `load/loadBefore/loadSerial`.
:nodes: C -> S
""")
:nodes: C -> S
"""
_fmt = PStruct('ask_object',
POID('oid'),
PTID('at'),
PTID('before'),
)
_answer = PStruct('answer_object',
POID('oid'),
PTID('serial_start'),
PTID('serial_end'),
PBoolean('compression'),
PChecksum('checksum'),
PString('data'),
PTID('data_serial'),
)
class TIDList(Packet):
"""
Ask for TIDs between a range of offsets. The order of TIDs is descending,
and the range is [first, last). This maps to `undoLog`.
:nodes: C -> S
"""
_fmt = PStruct('ask_tids',
PIndex('first'),
PIndex('last'),
PNumber('partition'),
)
AskPartitionList, AnswerPartitionList = request("""
Ask information about partitions.
_answer = PStruct('answer_tids',
PFTidList,
)
:nodes: ctl -> A
""")
class TIDListFrom(Packet):
"""
Ask for length TIDs starting at min_tid. The order of TIDs is ascending.
Used by `iterator`.
:nodes: C -> S
"""
_fmt = PStruct('tid_list_from',
PTID('min_tid'),
PTID('max_tid'),
PNumber('length'),
PNumber('partition'),
)
_answer = PStruct('answer_tids',
PFTidList,
)
class TransactionInformation(Packet):
"""
Ask for transaction metadata.
:nodes: C -> S
"""
_fmt = PStruct('ask_transaction_information',
PTID('tid'),
)
_answer = PStruct('answer_transaction_information',
PTID('tid'),
PString('user'),
PString('description'),
PString('extension'),
PBoolean('packed'),
PFOidList,
)
class ObjectHistory(Packet):
"""
Ask history information for a given object. The order of serials is
descending, and the range is [first, last]. This maps to `history`.
:nodes: C -> S
"""
_fmt = PStruct('ask_object_history',
POID('oid'),
PIndex('first'),
PIndex('last'),
)
_answer = PStruct('answer_object_history',
POID('oid'),
PFHistoryList,
)
class PartitionList(Packet):
"""
Ask information about partitions.
AskNodeList, AnswerNodeList = request("""
Ask information about nodes.
:nodes: ctl -> A
"""
_fmt = PStruct('ask_partition_list',
PNumber('min_offset'),
PNumber('max_offset'),
PUUID('uuid'),
)
_answer = PStruct('answer_partition_list',
PPTID('ptid'),
PFRowList,
)
class NodeList(Packet):
"""
Ask information about nodes.
:nodes: ctl -> A
""")
:nodes: ctl -> A
"""
_fmt = PStruct('ask_node_list',
PFNodeType,
)
SetNodeState = request("""
Change the state of a node.
_answer = PStruct('answer_node_list',
PFNodeList,
)
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
class SetNodeState(Packet):
"""
Change the state of a node.
AddPendingNodes = request("""
Mark given pending nodes as running, for future inclusion when tweaking
the partition table.
:nodes: ctl -> A -> M
"""
_fmt = PStruct('set_node_state',
PUUID('uuid'),
PFNodeState,
)
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
_answer = Error
TweakPartitionTable = request("""
Ask the master to balance the partition table, optionally excluding
specific nodes in anticipation of removing them.
class AddPendingNodes(Packet):
"""
Mark given pending nodes as running, for future inclusion when tweaking
the partition table.
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
:nodes: ctl -> A -> M
"""
_fmt = PStruct('add_pending_nodes',
PFUUIDList,
)
SetClusterState = request("""
Set the cluster state.
_answer = Error
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
class TweakPartitionTable(Packet):
"""
Ask the master to balance the partition table, optionally excluding
specific nodes in anticipation of removing them.
Repair = request("""
Ask storage nodes to repair their databases.
:nodes: ctl -> A -> M
"""
_fmt = PStruct('tweak_partition_table',
PFUUIDList,
)
:nodes: ctl -> A -> M
""", error=True)
_answer = Error
NotifyRepair = notify("""
Repair is translated to this message, asking a specific storage node to
repair its database.
class NotifyNodeInformation(Packet):
"""
Notify information about one or more nodes.
:nodes: M -> S
""")
:nodes: M -> *
"""
_fmt = PStruct('notify_node_informations',
PFloat('id_timestamp'),
PFNodeList,
)
NotifyClusterInformation = notify("""
Notify about a cluster state change.
class SetClusterState(Packet):
"""
Set the cluster state.
:nodes: M -> *
""")
:nodes: ctl -> A -> M
"""
_fmt = PStruct('set_cluster_state',
PEnum('state', ClusterStates),
)
AskClusterState, AnswerClusterState = request("""
Ask the state of the cluster
_answer = Error
:nodes: ctl -> A; A -> M
""")
class Repair(Packet):
"""
Ask storage nodes to repair their databases.
AskObjectUndoSerial, AnswerObjectUndoSerial = request("""
Ask storage the serial where object data is when undoing given
transaction, for a list of OIDs.
:nodes: ctl -> A -> M
"""
_flags = map(PBoolean, ('dry_run',
# 'prune_orphan' (commented because it's the only option for the moment)
))
_fmt = PStruct('repair',
PFUUIDList,
*_flags)
Answer a dict mapping oids to 3-tuples:
current_serial (TID)
The latest serial visible to the undoing transaction.
undo_serial (TID)
Where undone data is (tid at which data is before given undo).
is_current (bool)
If current_serial's data is current on storage.
_answer = Error
:nodes: C -> S
""", allow_dict=True)
class RepairOne(Packet):
"""
Repair is translated to this message, asking a specific storage node to
repair its database.
AskTIDsFrom, AnswerTIDsFrom = request("""
Ask for length TIDs starting at min_tid. The order of TIDs is ascending.
Used by `iterator`.
:nodes: M -> S
"""
_fmt = PStruct('repair', *Repair._flags)
:nodes: C -> S
""")
class ClusterInformation(Packet):
"""
Notify about a cluster state change.
AskPack, AnswerPack = request("""
Request a pack at given TID.
:nodes: M -> *
"""
_fmt = PStruct('notify_cluster_information',
PEnum('state', ClusterStates),
)
:nodes: C -> M -> S
""", ignore_when_closed=False)
class ClusterState(Packet):
"""
Ask the state of the cluster
CheckReplicas = request("""
Ask the cluster to search for mismatches between replicas, metadata
only, and optionally within a specific range. Reference nodes can be
specified.
:nodes: ctl -> A; A -> M
"""
:nodes: ctl -> A -> M
""", error=True, allow_dict=True)
_answer = PStruct('answer_cluster_state',
PEnum('state', ClusterStates),
)
CheckPartition = notify("""
Ask a storage node to compare a partition with all other nodes.
Like for CheckReplicas, only metadata are checked, optionally within a
specific range. A reference node can be specified.
class ObjectUndoSerial(Packet):
"""
Ask storage the serial where object data is when undoing given transaction,
for a list of OIDs.
:nodes: M -> S
""")
object_tid_dict has the following format:
key: oid
value: 3-tuple
current_serial (TID)
The latest serial visible to the undoing transaction.
undo_serial (TID)
Where undone data is (tid at which data is before given undo).
is_current (bool)
If current_serial's data is current on storage.
AskCheckTIDRange, AnswerCheckTIDRange = request("""
Ask some stats about a range of transactions.
Used to know if there are differences between a replicating node and
reference node.
:nodes: C -> S
"""
_fmt = PStruct('ask_undo_transaction',
PTID('tid'),
PTID('ltid'),
PTID('undone_tid'),
PFOidList,
)
_answer = PStruct('answer_undo_transaction',
PDict('object_tid_dict',
POID('oid'),
PStruct('object_tid_value',
PTID('current_serial'),
PTID('undo_serial'),
PBoolean('is_current'),
),
),
)
class CheckCurrentSerial(Packet):
"""
Check if given serial is current for the given oid, and lock it so that
this state is not altered until transaction ends.
This maps to `checkCurrentSerialInTransaction`.
:nodes: S -> S
""")
:nodes: C -> S
"""
_fmt = PStruct('ask_check_current_serial',
PTID('tid'),
POID('oid'),
PTID('serial'),
)
AskCheckSerialRange, AnswerCheckSerialRange = request("""
Ask some stats about a range of object history.
Used to know if there are differences between a replicating node and
reference node.
_answer = StoreObject._answer
:nodes: S -> S
""")
class Pack(Packet):
"""
Request a pack at given TID.
NotifyPartitionCorrupted = notify("""
Notify that mismatches were found while check replicas for a partition.
:nodes: C -> M -> S
"""
_fmt = PStruct('ask_pack',
PTID('tid'),
)
:nodes: S -> M
""")
_answer = PStruct('answer_pack',
PBoolean('status'),
)
NotifyReady = notify("""
Notify that we're ready to serve requests.
class CheckReplicas(Packet):
"""
Ask the cluster to search for mismatches between replicas, metadata only,
and optionally within a specific range. Reference nodes can be specified.
:nodes: S -> M
""")
:nodes: ctl -> A -> M
"""
_fmt = PStruct('check_replicas',
PDict('partition_dict',
PNumber('partition'),
PUUID('source'),
),
PTID('min_tid'),
PTID('max_tid'),
)
_answer = Error
class CheckPartition(Packet):
"""
Ask a storage node to compare a partition with all other nodes.
Like for CheckReplicas, only metadata are checked, optionally within a
specific range. A reference node can be specified.
AskLastTransaction, AnswerLastTransaction = request("""
Ask last committed TID.
:nodes: M -> S
"""
_fmt = PStruct('check_partition',
PNumber('partition'),
PStruct('source',
PString('upstream_name'),
PAddress('address'),
),
PTID('min_tid'),
PTID('max_tid'),
)
class CheckTIDRange(Packet):
"""
Ask some stats about a range of transactions.
Used to know if there are differences between a replicating node and
reference node.
:nodes: C -> M; ctl -> A -> M
""", poll_thread=True)
:nodes: S -> S
"""
_fmt = PStruct('ask_check_tid_range',
PNumber('partition'),
PNumber('length'),
PTID('min_tid'),
PTID('max_tid'),
)
_answer = PStruct('answer_check_tid_range',
PNumber('count'),
PChecksum('checksum'),
PTID('max_tid'),
)
class CheckSerialRange(Packet):
"""
Ask some stats about a range of object history.
Used to know if there are differences between a replicating node and
reference node.
AskCheckCurrentSerial, AnswerCheckCurrentSerial = request("""
Check if given serial is current for the given oid, and lock it so that
this state is not altered until transaction ends.
This maps to `checkCurrentSerialInTransaction`.
:nodes: S -> S
"""
_fmt = PStruct('ask_check_serial_range',
PNumber('partition'),
PNumber('length'),
PTID('min_tid'),
PTID('max_tid'),
POID('min_oid'),
)
_answer = PStruct('answer_check_serial_range',
PNumber('count'),
PChecksum('tid_checksum'),
PTID('max_tid'),
PChecksum('oid_checksum'),
POID('max_oid'),
)
class PartitionCorrupted(Packet):
"""
Notify that mismatches were found while check replicas for a partition.
:nodes: C -> S
""")
:nodes: S -> M
"""
_fmt = PStruct('partition_corrupted',
PNumber('partition'),
PList('cell_list',
PUUID('uuid'),
),
)
class LastTransaction(Packet):
"""
Ask last committed TID.
NotifyTransactionFinished = notify("""
Notify that a transaction blocking a replication is now finished.
:nodes: C -> M; ctl -> A -> M
"""
poll_thread = True
:nodes: M -> S
""")
_answer = PStruct('answer_last_transaction',
PTID('tid'),
)
Replicate = notify("""
Notify a storage node to replicate partitions up to given 'tid'
and from given sources.
class NotifyReady(Packet):
"""
Notify that we're ready to serve requests.
args: tid, upstream_name, {partition: address}
- upstream_name: replicate from an upstream cluster
- address: address of the source storage node, or None if there's
no new data up to 'tid' for the given partition
:nodes: S -> M
"""
:nodes: M -> S
""", allow_dict=True)
class FetchTransactions(Packet):
"""
Ask a storage node to send all transaction data we don't have,
and reply with the list of transactions we should not have.
NotifyReplicationDone = notify("""
Notify the master node that a partition has been successfully
replicated from a storage to another.
:nodes: S -> S
"""
_fmt = PStruct('ask_transaction_list',
PNumber('partition'),
PNumber('length'),
PTID('min_tid'),
PTID('max_tid'),
PFTidList, # already known transactions
)
_answer = PStruct('answer_transaction_list',
PTID('pack_tid'),
PTID('next_tid'),
PFTidList, # transactions to delete
)
class AddTransaction(Packet):
"""
Send metadata of a transaction to a node that do not have them.
:nodes: S -> M
""")
:nodes: S -> S
"""
nodelay = False
_fmt = PStruct('add_transaction',
PTID('tid'),
PString('user'),
PString('description'),
PString('extension'),
PBoolean('packed'),
PTID('ttid'),
PFOidList,
)
class FetchObjects(Packet):
"""
Ask a storage node to send object records we don't have,
and reply with the list of records we should not have.
AskFetchTransactions, AnswerFetchTransactions = request("""
Ask a storage node to send all transaction data we don't have,
and reply with the list of transactions we should not have.
:nodes: S -> S
"""
_fmt = PStruct('ask_object_list',
PNumber('partition'),
PNumber('length'),
PTID('min_tid'),
PTID('max_tid'),
POID('min_oid'),
PDict('object_dict', # already known objects
PTID('serial'),
PFOidList,
),
)
_answer = PStruct('answer_object_list',
PTID('pack_tid'),
PTID('next_tid'),
POID('next_oid'),
PDict('object_dict', # objects to delete
PTID('serial'),
PFOidList,
),
)
class AddObject(Packet):
"""
Send an object record to a node that do not have it.
:nodes: S -> S
""")
:nodes: S -> S
"""
nodelay = False
_fmt = PStruct('add_object',
POID('oid'),
PTID('serial'),
PBoolean('compression'),
PChecksum('checksum'),
PString('data'),
PTID('data_serial'),
)
class Replicate(Packet):
"""
Notify a storage node to replicate partitions up to given 'tid'
and from given sources.
AskFetchObjects, AnswerFetchObjects = request("""
Ask a storage node to send object records we don't have,
and reply with the list of records we should not have.
- upstream_name: replicate from an upstream cluster
- address: address of the source storage node, or None if there's no new
data up to 'tid' for the given partition
:nodes: S -> S
""", allow_dict=True)
:nodes: M -> S
"""
_fmt = PStruct('replicate',
PTID('tid'),
PString('upstream_name'),
PDict('source_dict',
PNumber('partition'),
PAddress('address'),
)
)
class ReplicationDone(Packet):
"""
Notify the master node that a partition has been successfully replicated
from a storage to another.
AddTransaction = notify("""
Send metadata of a transaction to a node that does not have them.
:nodes: S -> M
"""
_fmt = PStruct('notify_replication_done',
PNumber('offset'),
PTID('tid'),
)
:nodes: S -> S
""", nodelay=False)
class Truncate(Packet):
"""
Request DB to be truncated. Also used to leave backup mode.
AddObject = notify("""
Send an object record to a node that does not have it.
:nodes: ctl -> A -> M; M -> S
"""
_fmt = PStruct('truncate',
PTID('tid'),
)
:nodes: S -> S
""", nodelay=False, data_path=(0, 2))
_answer = Error
Truncate = request("""
Request DB to be truncated. Also used to leave backup mode.
class FlushLog(Packet):
"""
Request all nodes to flush their logs.
:nodes: ctl -> A -> M; M -> S
""", error=True)
:nodes: ctl -> A -> M -> *
"""
FlushLog = notify("""
Request all nodes to flush their logs.
:nodes: ctl -> A -> M -> *
""")
_next_code = 0
def register(request, ignore_when_closed=None):
""" Register a packet in the packet registry """
global _next_code
code = _next_code
assert code < RESPONSE_MASK
_next_code = code + 1
if request is Error:
code |= RESPONSE_MASK
# register the request
request._code = code
answer = request._answer
if ignore_when_closed is None:
# By default, on a closed connection:
# - request: ignore
# - answer: keep
# - notification: keep
ignore_when_closed = answer is not None
request._ignore_when_closed = ignore_when_closed
if answer in (Error, None):
return request
# build a class for the answer
answer = type('Answer' + request.__name__, (Packet, ), {})
answer._fmt = request._answer
answer.poll_thread = request.poll_thread
answer._request = request
assert answer._code is None, "Answer of %s is already used" % (request, )
answer._code = code | RESPONSE_MASK
request._answer = answer
return request, answer
del notify, request
class Packets(dict):
"""
Packet registry that checks packet code uniqueness and provides an index
"""
def __metaclass__(name, base, d):
# this builds a "singleton"
cls = type('PacketRegistry', base, d)()
for k, v in d.iteritems():
if isinstance(v, type) and issubclass(v, Packet):
v.handler_method_name = k[0].lower() + k[1:]
cls[v._code] = v
return cls
Error = register(
Error)
RequestIdentification, AcceptIdentification = register(
RequestIdentification, ignore_when_closed=True)
Ping, Pong = register(
Ping)
CloseClient = register(
CloseClient)
AskPrimary, AnswerPrimary = register(
PrimaryMaster)
NotPrimaryMaster = register(
NotPrimaryMaster)
NotifyNodeInformation = register(
NotifyNodeInformation)
AskRecovery, AnswerRecovery = register(
Recovery)
AskLastIDs, AnswerLastIDs = register(
LastIDs)
AskPartitionTable, AnswerPartitionTable = register(
PartitionTable)
SendPartitionTable = register(
NotifyPartitionTable)
NotifyPartitionChanges = register(
PartitionChanges)
StartOperation = register(
StartOperation)
StopOperation = register(
StopOperation)
AskUnfinishedTransactions, AnswerUnfinishedTransactions = register(
UnfinishedTransactions)
AskLockedTransactions, AnswerLockedTransactions = register(
LockedTransactions)
AskFinalTID, AnswerFinalTID = register(
FinalTID)
ValidateTransaction = register(
ValidateTransaction)
AskBeginTransaction, AnswerBeginTransaction = register(
BeginTransaction)
FailedVote = register(
FailedVote)
AskFinishTransaction, AnswerTransactionFinished = register(
FinishTransaction, ignore_when_closed=False)
AskLockInformation, AnswerInformationLocked = register(
LockInformation, ignore_when_closed=False)
InvalidateObjects = register(
InvalidateObjects)
NotifyUnlockInformation = register(
UnlockInformation)
AskNewOIDs, AnswerNewOIDs = register(
GenerateOIDs)
NotifyDeadlock = register(
Deadlock)
AskRebaseTransaction, AnswerRebaseTransaction = register(
RebaseTransaction)
AskRebaseObject, AnswerRebaseObject = register(
RebaseObject)
AskStoreObject, AnswerStoreObject = register(
StoreObject)
AbortTransaction = register(
AbortTransaction)
AskStoreTransaction, AnswerStoreTransaction = register(
StoreTransaction)
AskVoteTransaction, AnswerVoteTransaction = register(
VoteTransaction)
AskObject, AnswerObject = register(
GetObject)
AskTIDs, AnswerTIDs = register(
TIDList)
AskTransactionInformation, AnswerTransactionInformation = register(
TransactionInformation)
AskObjectHistory, AnswerObjectHistory = register(
ObjectHistory)
AskPartitionList, AnswerPartitionList = register(
PartitionList)
AskNodeList, AnswerNodeList = register(
NodeList)
SetNodeState = register(
SetNodeState, ignore_when_closed=False)
AddPendingNodes = register(
AddPendingNodes, ignore_when_closed=False)
TweakPartitionTable = register(
TweakPartitionTable, ignore_when_closed=False)
SetClusterState = register(
SetClusterState, ignore_when_closed=False)
Repair = register(
Repair)
NotifyRepair = register(
RepairOne)
NotifyClusterInformation = register(
ClusterInformation)
AskClusterState, AnswerClusterState = register(
ClusterState)
AskObjectUndoSerial, AnswerObjectUndoSerial = register(
ObjectUndoSerial)
AskTIDsFrom, AnswerTIDsFrom = register(
TIDListFrom)
AskPack, AnswerPack = register(
Pack, ignore_when_closed=False)
CheckReplicas = register(
CheckReplicas)
CheckPartition = register(
CheckPartition)
AskCheckTIDRange, AnswerCheckTIDRange = register(
CheckTIDRange)
AskCheckSerialRange, AnswerCheckSerialRange = register(
CheckSerialRange)
NotifyPartitionCorrupted = register(
PartitionCorrupted)
NotifyReady = register(
NotifyReady)
AskLastTransaction, AnswerLastTransaction = register(
LastTransaction)
AskCheckCurrentSerial, AnswerCheckCurrentSerial = register(
CheckCurrentSerial)
NotifyTransactionFinished = register(
NotifyTransactionFinished)
Replicate = register(
Replicate)
NotifyReplicationDone = register(
ReplicationDone)
AskFetchTransactions, AnswerFetchTransactions = register(
FetchTransactions)
AskFetchObjects, AnswerFetchObjects = register(
FetchObjects)
AddTransaction = register(
AddTransaction)
AddObject = register(
AddObject)
Truncate = register(
Truncate)
FlushLog = register(
FlushLog)
def Errors():
registry_dict = {}
handler_method_name_dict = {}
Error = Packets.Error
def register_error(code):
return lambda self, message='': Error(code, message)
for error in ErrorCodes:
......@@ -1836,19 +841,20 @@ from operator import itemgetter
def formatNodeList(node_list, prefix='', _sort_key=itemgetter(2)):
if node_list:
node_list.sort(key=_sort_key)
node_list = [(
uuid_str(uuid), str(node_type),
('[%s]:%s' if ':' in addr[0] else '%s:%s')
% addr if addr else '', str(state),
str(id_timestamp and datetime.utcfromtimestamp(id_timestamp)))
for node_type, addr, uuid, state, id_timestamp in node_list]
for node_type, addr, uuid, state, id_timestamp
in sorted(node_list, key=_sort_key)]
t = ''.join('%%-%us | ' % max(len(x[i]) for x in node_list)
for i in xrange(len(node_list[0]) - 1))
return map((prefix + t + '%s').__mod__, node_list)
return ()
NotifyNodeInformation._neolog = staticmethod(lambda timestamp, node_list:
Packets.NotifyNodeInformation._neolog = staticmethod(
lambda timestamp, node_list:
((timestamp,), formatNodeList(node_list, ' ! ')))
Error._neolog = staticmethod(lambda *args: ((), ("%s (%s)" % args,)))
Packets.Error._neolog = staticmethod(lambda *args: ((), ("%s (%s)" % args,)))
......@@ -166,65 +166,6 @@ def parseMasterList(masters):
return map(parseNodeAddress, masters.split())
class ReadBuffer(object):
"""
Implementation of a lazy buffer. Main purpose if to reduce useless
copies of data by storing chunks and join them only when the requested
size is available.
TODO: For better performance, use:
- socket.recv_into (64kiB blocks)
- struct.unpack_from
- and a circular buffer of dynamic size (initial size:
twice the length passed to socket.recv_into ?)
"""
def __init__(self):
self.size = 0
self.content = deque()
def append(self, data):
""" Append some data and compute the new buffer size """
self.size += len(data)
self.content.append(data)
def __len__(self):
""" Return the current buffer size """
return self.size
def read(self, size):
""" Read and consume size bytes """
if self.size < size:
return None
self.size -= size
chunk_list = []
pop_chunk = self.content.popleft
append_data = chunk_list.append
to_read = size
# select required chunks
while to_read > 0:
chunk_data = pop_chunk()
to_read -= len(chunk_data)
append_data(chunk_data)
if to_read < 0:
# too many bytes consumed, cut the last chunk
last_chunk = chunk_list[-1]
keep, let = last_chunk[:to_read], last_chunk[to_read:]
self.content.appendleft(let)
chunk_list[-1] = keep
# join all chunks (one copy)
data = ''.join(chunk_list)
assert len(data) == size
return data
def clear(self):
""" Erase all buffer content """
self.size = 0
self.content.clear()
dummy_read_buffer = ReadBuffer()
dummy_read_buffer.append = lambda _: None
class cached_property(object):
"""
A property that is only computed once per instance and then replaces itself
......
......@@ -578,7 +578,9 @@ class Application(BaseApplication):
self.tm.executeQueuedEvents()
def startStorage(self, node):
node.send(Packets.StartOperation(self.backup_tid))
# XXX: Is this boolean 'backup' field needed ?
# Maybe this can be deduced from cluster state.
node.send(Packets.StartOperation(bool(self.backup_tid)))
uuid = node.getUUID()
assert uuid not in self.storage_starting_set
if uuid not in self.storage_ready_dict:
......
......@@ -157,27 +157,49 @@ class Log(object):
for x in 'uuid_str', 'Packets', 'PacketMalformedError':
setattr(self, x, g[x])
x = {}
try:
Unpacker = g['Unpacker']
except KeyError:
unpackb = None
else:
from msgpack import ExtraData, UnpackException
def unpackb(data):
u = Unpacker()
u.feed(data)
data = u.unpack()
if u.read_bytes(1):
raise ExtraData
return data
self.PacketMalformedError = UnpackException
self.unpackb = unpackb
if self._decode > 1:
PStruct = g['PStruct']
PBoolean = g['PBoolean']
def hasData(item):
items = item._items
for i, item in enumerate(items):
if isinstance(item, PStruct):
j = hasData(item)
if j:
return (i,) + j
elif (isinstance(item, PBoolean)
and item._name == 'compression'
and i + 2 < len(items)
and items[i+2]._name == 'data'):
return i,
for p in self.Packets.itervalues():
if p._fmt is not None:
path = hasData(p._fmt)
if path:
assert not hasattr(p, '_neolog'), p
x[p._code] = path
try:
PStruct = g['PStruct']
except KeyError:
for p in self.Packets.itervalues():
data_path = getattr(p, 'data_path', (None,))
if p._code >> 15 == data_path[0]:
x[p._code] = data_path[1:]
else:
PBoolean = g['PBoolean']
def hasData(item):
items = item._items
for i, item in enumerate(items):
if isinstance(item, PStruct):
j = hasData(item)
if j:
return (i,) + j
elif (isinstance(item, PBoolean)
and item._name == 'compression'
and i + 2 < len(items)
and items[i+2]._name == 'data'):
return i,
for p in self.Packets.itervalues():
if p._fmt is not None:
path = hasData(p._fmt)
if path:
assert not hasattr(p, '_neolog'), p
x[p._code] = path
self._getDataPath = x.get
try:
......@@ -215,11 +237,13 @@ class Log(object):
if body is not None:
log = getattr(p, '_neolog', None)
if log or self._decode:
p = p()
p._id = msg_id
p._body = body
try:
args = p.decode()
if self.unpackb:
args = self.unpackb(body)
else:
p = p()
p._body = body
args = p.decode()
except self.PacketMalformedError:
msg.append("Can't decode packet")
else:
......
......@@ -451,8 +451,12 @@ class SQLiteDatabaseManager(DatabaseManager):
return r
def loadData(self, data_id):
return self.query("SELECT compression, hash, value"
" FROM data WHERE id=?", (data_id,)).fetchone()
compression, checksum, data = self.query(
"SELECT compression, hash, value FROM data WHERE id=?",
(data_id,)).fetchone()
if checksum:
return compression, str(checksum), str(data)
return compression, checksum, data
def _getDataTID(self, oid, tid=None, before_tid=None):
partition = self._getReadablePartition(oid)
......
......@@ -248,7 +248,7 @@ class StorageOperationHandler(EventHandler):
for serial, oid in object_list:
oid_set = object_dict.get(serial)
if oid_set:
if type(oid_set) is list:
if type(oid_set) is tuple:
object_dict[serial] = oid_set = set(oid_set)
if oid in oid_set:
oid_set.remove(oid)
......
......@@ -73,7 +73,7 @@ class MasterClientHandlerTests(NeoUnitTestBase):
self.app.nm.getByUUID(storage_uuid).setConnection(storage_conn)
self.service.askPack(conn, tid)
self.checkNoPacketSent(conn)
ptid = self.checkAskPacket(storage_conn, Packets.AskPack).decode()[0]
ptid = self.checkAskPacket(storage_conn, Packets.AskPack)._args[0]
self.assertEqual(ptid, tid)
self.assertTrue(self.app.packing[0] is conn)
self.assertEqual(self.app.packing[1], peer_id)
......@@ -85,7 +85,7 @@ class MasterClientHandlerTests(NeoUnitTestBase):
self.app.nm.getByUUID(storage_uuid).setConnection(storage_conn)
self.service.askPack(conn, tid)
self.checkNoPacketSent(storage_conn)
status = self.checkAnswerPacket(conn, Packets.AnswerPack).decode()[0]
status = self.checkAnswerPacket(conn, Packets.AnswerPack)._args[0]
self.assertFalse(status)
if __name__ == '__main__':
......
......@@ -73,7 +73,7 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
self.service.answerPack(conn2, False)
packet = self.checkNotifyPacket(client_conn, Packets.AnswerPack)
# TODO: verify packet peer id
self.assertTrue(packet.decode()[0])
self.assertTrue(packet._args[0])
self.assertEqual(self.app.packing, None)
if __name__ == '__main__':
......
......@@ -33,9 +33,9 @@ class HandlerTests(NeoUnitTestBase):
def getFakePacket(self):
p = Mock({
'decode': (),
'__repr__': 'Fake Packet',
})
p._args = ()
p.handler_method_name = 'fake_method'
return p
......@@ -53,13 +53,6 @@ class HandlerTests(NeoUnitTestBase):
self.handler.dispatch(conn, packet)
self.checkErrorPacket(conn)
self.checkAborted(conn)
# raise PacketMalformedError
conn.mockCalledMethods = {}
def fake(c):
raise PacketMalformedError('message')
self.setFakeMethod(fake)
self.handler.dispatch(conn, packet)
self.checkClosed(conn)
# raise NotReadyError
conn.mockCalledMethods = {}
def fake(c):
......
......@@ -17,7 +17,7 @@
import unittest
import socket
from . import NeoUnitTestBase
from neo.lib.util import ReadBuffer, parseNodeAddress
from neo.lib.util import parseNodeAddress
class UtilTests(NeoUnitTestBase):
......@@ -40,24 +40,6 @@ class UtilTests(NeoUnitTestBase):
self.assertIn(parseNodeAddress('localhost'), local_address(0))
self.assertIn(parseNodeAddress('localhost:10'), local_address(10))
def testReadBufferRead(self):
""" Append some chunk then consume the data """
buf = ReadBuffer()
self.assertEqual(len(buf), 0)
buf.append('abc')
self.assertEqual(len(buf), 3)
# no enough data
self.assertEqual(buf.read(4), None)
self.assertEqual(len(buf), 3)
buf.append('def')
# consume a part
self.assertEqual(len(buf), 6)
self.assertEqual(buf.read(4), 'abcd')
self.assertEqual(len(buf), 2)
# consume the rest
self.assertEqual(buf.read(3), None)
self.assertEqual(buf.read(2), 'ef')
if __name__ == "__main__":
unittest.main()
......@@ -1134,7 +1134,7 @@ class Test(NEOThreadedTest):
# Also check that the master reset the last oid to a correct value.
t.begin()
self.assertEqual(1, u64(c.root()['x']._p_oid))
self.assertFalse(cluster.client.new_oid_list)
self.assertFalse(cluster.client.new_oids)
self.assertEqual(2, u64(cluster.client.new_oid()))
@with_cluster()
......@@ -1911,7 +1911,7 @@ class Test(NEOThreadedTest):
except threading.ThreadError:
l[j].acquire()
threads[j-1].start()
if x != 'StoreTransaction':
if x != 'AskStoreTransaction':
try:
l[i].acquire()
except IndexError:
......@@ -1988,15 +1988,16 @@ class Test(NEOThreadedTest):
x = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
(1, 1, 0, 1, 2, 2, 2, 2, 0, 1, 2, 1, 0, 0, 1, 0, 0, 1),
('tpc_begin', 'tpc_begin', 1, 2, 3, 'tpc_begin', 1, 2, 4, 3, 4,
'StoreTransaction', 'RebaseTransaction', 'RebaseTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
'RebaseTransaction', 'AnswerRebaseTransaction'),
'AskStoreTransaction', 'AskRebaseTransaction',
'AskRebaseTransaction', 'AnswerRebaseTransaction',
'AnswerRebaseTransaction', 'AskRebaseTransaction',
'AnswerRebaseTransaction'),
[4, 6, 2, 6])
try:
x[1].remove(1)
except ValueError:
pass
self.assertEqual(x, {0: [2, 'StoreTransaction'], 1: ['tpc_abort']})
self.assertEqual(x, {0: [2, 'AskStoreTransaction'], 1: ['tpc_abort']})
def testCascadedDeadlockAvoidanceWithOneStorage2(self):
def changes(r1, r2, r3):
......@@ -2019,8 +2020,8 @@ class Test(NEOThreadedTest):
(0, 1, 1, 0, 1, 2, 2, 2, 2, 0, 1, 2, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1),
('tpc_begin', 1, 'tpc_begin', 1, 2, 3, 'tpc_begin',
2, 3, 4, 3, 4, 'StoreTransaction', 'RebaseTransaction',
'RebaseTransaction', 'AnswerRebaseTransaction'),
2, 3, 4, 3, 4, 'AskStoreTransaction', 'AskRebaseTransaction',
'AskRebaseTransaction', 'AnswerRebaseTransaction'),
[1, 7, 9, 0])
x[0].sort(key=str)
try:
......@@ -2029,8 +2030,8 @@ class Test(NEOThreadedTest):
pass
self.assertEqual(x, {
0: [2, 3, 'AnswerRebaseTransaction',
'RebaseTransaction', 'StoreTransaction'],
1: ['AnswerRebaseTransaction','RebaseTransaction',
'AskRebaseTransaction', 'AskStoreTransaction'],
1: ['AnswerRebaseTransaction','AskRebaseTransaction',
'AnswerRebaseTransaction', 'tpc_abort'],
})
......@@ -2063,7 +2064,7 @@ class Test(NEOThreadedTest):
end = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
(0, 1, 1, 0, 1, 1, 0, 0, 2, 2, 2, 2, 1, vote_t2, tic_t1),
('tpc_begin', 1) * 2, [3, 0, 0, 0], None)
self.assertLessEqual(2, end[0].count('RebaseTransaction'))
self.assertLessEqual(2, end[0].count('AskRebaseTransaction'))
def testFailedConflictOnBigValueDuringDeadlockAvoidance(self):
def changes(r1, r2, r3):
......@@ -2079,10 +2080,10 @@ class Test(NEOThreadedTest):
x = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
(1, 1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 1, 1, 1, 0),
('tpc_begin', 'tpc_begin', 1, 2, 'tpc_begin', 1, 3, 3, 4,
'StoreTransaction', 2, 4, 'RebaseTransaction',
'AskStoreTransaction', 2, 4, 'AskRebaseTransaction',
'AnswerRebaseTransaction', 'tpc_abort'),
[5, 1, 0, 2], POSException.ConflictError)
self.assertEqual(x, {0: ['StoreTransaction']})
self.assertEqual(x, {0: ['AskStoreTransaction']})
@with_cluster(replicas=1, partitions=4)
def testNotifyReplicated(self, cluster):
......@@ -2169,7 +2170,7 @@ class Test(NEOThreadedTest):
def delayConflict(conn, packet):
app = self.getConnectionApp(conn)
if (isinstance(packet, Packets.AnswerStoreObject)
and packet.decode()[0]):
and packet._args[0]):
conn, = cluster.client.getConnectionList(app)
kw = conn._handlers._pending[0][0][packet._id][1]
return 1 == u64(kw['oid']) and delay_conflict[app.uuid].pop()
......@@ -2187,8 +2188,9 @@ class Test(NEOThreadedTest):
self.thread_switcher(threads,
(1, 2, 3, 0, 1, 0, 2, t3_c, 1, 3, 2, t3_resolve, 0, 0, 0,
t1_rebase, 2, t3_b, 3, t4_d, 0, 2, 2),
('tpc_begin', 'tpc_begin', 'tpc_begin', 'tpc_begin', 2, 1, 1,
3, 3, 4, 4, 3, 1, 'RebaseTransaction', 'RebaseTransaction',
('tpc_begin', 'tpc_begin', 'tpc_begin', 'tpc_begin',
2, 1, 1, 3, 3, 4, 4, 3, 1,
'AskRebaseTransaction', 'AskRebaseTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction', 2
)) as end:
delay = f.delayAskFetchTransactions()
......@@ -2200,11 +2202,11 @@ class Test(NEOThreadedTest):
t4.begin()
self.assertEqual([15, 11, 13, 16], [r[x].value for x in 'abcd'])
self.assertEqual([2, 2], map(end.pop(2).count,
['RebaseTransaction', 'AnswerRebaseTransaction']))
['AskRebaseTransaction', 'AnswerRebaseTransaction']))
self.assertEqual(end, {
0: [1, 'StoreTransaction'],
1: ['StoreTransaction'],
3: [4, 'StoreTransaction'],
0: [1, 'AskStoreTransaction'],
1: ['AskStoreTransaction'],
3: [4, 'AskStoreTransaction'],
})
self.assertFalse(s1.dm.getOrphanList())
......@@ -2240,7 +2242,8 @@ class Test(NEOThreadedTest):
self.thread_switcher((thread,),
(1, 0, 1, 1, t2_b, 0, 0, 1, t2_vote, 0, 0),
('tpc_begin', 'tpc_begin', 1, 1, 2, 2,
'RebaseTransaction', 'RebaseTransaction', 'StoreTransaction',
'AskRebaseTransaction', 'AskRebaseTransaction',
'AskStoreTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
)) as end:
delay = f.delayAskFetchTransactions()
......@@ -2299,9 +2302,10 @@ class Test(NEOThreadedTest):
with self.thread_switcher((commit23,),
(1, 1, 0, 0, t1_rebase, 0, 0, 0, 1, 1, 1, 1, 0),
('tpc_begin', 'tpc_begin', 0, 1, 0,
'RebaseTransaction', 'RebaseTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
'StoreTransaction', 'tpc_begin', 1, 'tpc_abort')) as end:
'AskRebaseTransaction', 'AskRebaseTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
'AskStoreTransaction', 'tpc_begin', 1, 'tpc_abort',
)) as end:
self.assertRaises(POSException.ConflictError, t1.commit)
commit23.join()
self.assertEqual(end, {0: ['tpc_abort']})
......@@ -2388,9 +2392,9 @@ class Test(NEOThreadedTest):
self.thread_switcher((commit2,),
(1, 1, 0, 0, t1_b, t1_resolve, 0, 0, 0, 0, 1, t2_vote, t1_end),
('tpc_begin', 'tpc_begin', 2, 1, 2, 1, 1,
'RebaseTransaction', 'RebaseTransaction',
'AskRebaseTransaction', 'AskRebaseTransaction',
'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
'StoreTransaction')) as end:
'AskStoreTransaction')) as end:
t1.commit()
commit2.join()
t1.begin()
......@@ -2398,7 +2402,7 @@ class Test(NEOThreadedTest):
self.assertEqual(r['a'].value, 9)
self.assertEqual(r['b'].value, 6)
t1 = end.pop(0)
self.assertEqual(t1.pop(), 'StoreTransaction')
self.assertEqual(t1.pop(), 'AskStoreTransaction')
self.assertEqual(sorted(t1), [1, 2])
self.assertFalse(end)
self.assertPartitionTable(cluster, 'UU|UU')
......@@ -2500,9 +2504,9 @@ class Test(NEOThreadedTest):
with Patch(cluster.client, _loadFromStorage=load) as p, \
self.thread_switcher((commit2,),
(1, 0, tic1, 0, t1_resolve, 1, t2_begin, 0, 1, 1, 0),
('tpc_begin', 'tpc_begin', 1, 1, 1, 'StoreTransaction',
'tpc_begin', 'RebaseTransaction', 'RebaseTransaction', 1,
'StoreTransaction')) as end:
('tpc_begin', 'tpc_begin', 1, 1, 1, 'AskStoreTransaction',
'tpc_begin', 'AskRebaseTransaction', 'AskRebaseTransaction',
1, 'AskStoreTransaction')) as end:
self.assertRaisesRegexp(NEOStorageError,
'^partition 0 not fully write-locked$',
t1.commit)
......@@ -2555,13 +2559,14 @@ class Test(NEOThreadedTest):
f.remove(delayFinish)
with self.thread_switcher((commit2,),
(1, 0, 0, 1, t2_b, 0, t1_resolve),
('tpc_begin', 'tpc_begin', 0, 2, 2, 'StoreTransaction')) as end:
('tpc_begin', 'tpc_begin', 0, 2, 2, 'AskStoreTransaction')
) as end:
t1.commit()
commit2.join()
t1.begin()
self.assertEqual(c1.root()['b'].value, 6)
self.assertPartitionTable(cluster, 'UU|UU')
self.assertEqual(end, {0: [2, 2, 'StoreTransaction']})
self.assertEqual(end, {0: [2, 2, 'AskStoreTransaction']})
self.assertFalse(s1.dm.getOrphanList())
@with_cluster(storage_count=2, partitions=2)
......@@ -2584,19 +2589,19 @@ class Test(NEOThreadedTest):
yield 1
self.tic()
with self.thread_switcher((t,), (1, 0, 1, 0, t1_b, 0, 0, 0, 1),
('tpc_begin', 'tpc_begin', 1, 3, 3, 1, 'RebaseTransaction',
('tpc_begin', 'tpc_begin', 1, 3, 3, 1, 'AskRebaseTransaction',
2, 'AnswerRebaseTransaction')) as end:
t1.commit()
t.join()
t2.begin()
self.assertEqual([6, 9, 6], [r[x].value for x in 'abc'])
self.assertEqual([2, 2], map(end.pop(1).count,
['RebaseTransaction', 'AnswerRebaseTransaction']))
['AskRebaseTransaction', 'AnswerRebaseTransaction']))
# Rarely, there's an extra deadlock for t1:
# 0: ['AnswerRebaseTransaction', 'RebaseTransaction',
# 'RebaseTransaction', 'AnswerRebaseTransaction',
# 0: ['AnswerRebaseTransaction', 'AskRebaseTransaction',
# 'AskRebaseTransaction', 'AnswerRebaseTransaction',
# 'AnswerRebaseTransaction', 2, 3, 1,
# 'StoreTransaction', 'VoteTransaction']
# 'AskStoreTransaction', 'VoteTransaction']
self.assertEqual(end.pop(0)[0], 'AnswerRebaseTransaction')
self.assertFalse(end)
......@@ -2626,13 +2631,13 @@ class Test(NEOThreadedTest):
threads = map(self.newPausedThread, (t2.commit, t3.commit))
with self.thread_switcher(threads, (1, 2, 0, 1, 2, 1, 0, 2, 0, 1, 2),
('tpc_begin', 'tpc_begin', 'tpc_begin', 1, 2, 3, 4, 4, 4,
'RebaseTransaction', 'StoreTransaction')) as end:
'AskRebaseTransaction', 'AskStoreTransaction')) as end:
t1.commit()
for t in threads:
t.join()
self.assertEqual(end, {
0: ['AnswerRebaseTransaction', 'StoreTransaction'],
2: ['StoreTransaction']})
0: ['AnswerRebaseTransaction', 'AskStoreTransaction'],
2: ['AskStoreTransaction']})
@with_cluster(replicas=1)
def testConflictAfterDeadlockWithSlowReplica1(self, cluster,
......@@ -2675,16 +2680,16 @@ class Test(NEOThreadedTest):
order[-1] = t1_resolve
delay = f.delayAskStoreObject()
with self.thread_switcher((t,), order,
('tpc_begin', 'tpc_begin', 1, 1, 2, 2, 'RebaseTransaction',
'RebaseTransaction', 'AnswerRebaseTransaction',
'StoreTransaction')) as end:
('tpc_begin', 'tpc_begin', 1, 1, 2, 2, 'AskRebaseTransaction',
'AskRebaseTransaction', 'AnswerRebaseTransaction',
'AskStoreTransaction')) as end:
t1.commit()
t.join()
self.assertNotIn(delay, f)
t2.begin()
end[0].sort(key=str)
self.assertEqual(end, {0: [1, 'AnswerRebaseTransaction',
'StoreTransaction']})
'AskStoreTransaction']})
self.assertEqual([4, 2], [r[x].value for x in 'ab'])
def testConflictAfterDeadlockWithSlowReplica2(self):
......@@ -2735,7 +2740,7 @@ class Test(NEOThreadedTest):
with ConnectionFilter() as f:
f.add(lambda conn, packet:
isinstance(packet, Packets.RequestIdentification)
and packet.decode()[0] == NodeTypes.STORAGE)
and packet._args[0] == NodeTypes.STORAGE)
self.tic()
m2.start()
self.tic()
......@@ -2775,7 +2780,7 @@ class Test(NEOThreadedTest):
with ConnectionFilter() as f:
f.add(lambda conn, packet:
isinstance(packet, Packets.RequestIdentification)
and packet.decode()[0] == NodeTypes.MASTER)
and packet._args[0] == NodeTypes.MASTER)
cluster.start(recovering=True)
neoctl = cluster.neoctl
getClusterState = neoctl.getClusterState
......
......@@ -103,7 +103,7 @@ class ReplicationTests(NEOThreadedTest):
importZODB(3)
def delaySecondary(conn, packet):
if isinstance(packet, Packets.Replicate):
tid, upstream_name, source_dict = packet.decode()
tid, upstream_name, source_dict = packet._args
return not upstream_name and all(source_dict.itervalues())
with NEOCluster(partitions=np, replicas=nr-1, storage_count=5,
upstream=upstream) as backup:
......@@ -443,7 +443,7 @@ class ReplicationTests(NEOThreadedTest):
"""
def delayAskFetch(conn, packet):
return isinstance(packet, delayed) and \
packet.decode()[0] == offset and \
packet._args[0] == offset and \
conn in s1.getConnectionList(s0)
def changePartitionTable(orig, ptid, cell_list):
if (offset, s0.uuid, CellStates.DISCARDED) in cell_list:
......@@ -695,7 +695,7 @@ class ReplicationTests(NEOThreadedTest):
def logReplication(conn, packet):
if isinstance(packet, (Packets.AskFetchTransactions,
Packets.AskFetchObjects)):
ask.append(packet.decode()[2:])
ask.append(packet._args[2:])
def getTIDList():
return [t.tid for t in c.db().storage.iterator()]
s0, s1 = cluster.storage_list
......@@ -796,7 +796,7 @@ class ReplicationTests(NEOThreadedTest):
return True
elif not isinstance(packet, Packets.AskFetchTransactions):
return
ask.append(packet.decode())
ask.append(packet._args)
conn, = upstream.master.getConnectionList(backup.master)
with ConnectionFilter() as f, Patch(replicator.Replicator,
_nextPartitionSortKey=lambda orig, self, offset: offset):
......@@ -857,11 +857,11 @@ class ReplicationTests(NEOThreadedTest):
@f.add
def delayReplicate(conn, packet):
if isinstance(packet, Packets.AskFetchTransactions):
trans.append(packet.decode()[2])
trans.append(packet._args[2])
elif isinstance(packet, Packets.AskFetchObjects):
if obj:
return True
obj.append(packet.decode()[2])
obj.append(packet._args[2])
s2.start()
self.tic()
cluster.neoctl.enableStorageList([s2.uuid])
......
......@@ -53,7 +53,7 @@ extras_require = {
'master': [],
'storage-sqlite': [],
'storage-mysqldb': ['mysqlclient'],
'storage-importer': zodb_require + ['msgpack>=0.5.6', 'setproctitle'],
'storage-importer': zodb_require + ['setproctitle'],
}
extras_require['tests'] = ['coverage', 'zope.testing', 'psutil>=2',
'neoppod[%s]' % ', '.join(extras_require)]
......@@ -108,6 +108,7 @@ setup(
],
},
install_requires = [
'msgpack>=0.5.6',
'python-dateutil', # neolog --from
],
extras_require = extras_require,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment