New algorithm for deadlock avoidance

The time complexity of previous one was too bad. With several tens of concurrent transactions, we saw commits take minutes to complete and the whole application looked frozen. This new algorithm is much simpler. Instead of asking the oldest transaction to somewhat restart (we used the "rebase" term because the concept was similar to what git-rebase does), the storage gives it priority and the newest is asked to relock (this request is ignored if vote already happened, which means there was actually no deadlock). testLocklessWriteDuringConflictResolution was initially more complex because Transaction.written (client) ignored KeyError (which is not the case anymore since commit 8ef1ddba).

New algorithm for deadlock avoidance
The time complexity of previous one was too bad. With several tens of concurrent transactions, we saw commits take minutes to complete and the whole application looked frozen. This new algorithm is much simpler. Instead of asking the oldest transaction to somewhat restart (we used the "rebase" term because the concept was similar to what git-rebase does), the storage gives it priority and the newest is asked to relock (this request is ignored if vote already happened, which means there was actually no deadlock). testLocklessWriteDuringConflictResolution was initially more complex because Transaction.written (client) ignored KeyError (which is not the case anymore since commit 8ef1ddba).
5e7f34d2 · Julien Muchembled · d98b576c · 5e7f34d2 · 5e7f34d2 · 5e7f34d2
Commit 5e7f34d2 authored Jul 25, 2019 by Julien Muchembled
12 changed files
--- a/neo/client/app.py
+++ b/neo/client/app.py
@@ -524,59 +524,37 @@ class Application(ThreadedApplication):
        while 1:
            # We iterate over conflict_dict, and clear it,
            # because new items may be added by calls to _store.
-            # This is also done atomically, to avoid race conditions
-            # with PrimaryNotificationsHandler.notifyDeadlock
            try:
                oid, serial = pop_conflict()
            except KeyError:
                return
+            data, old_serial, _ = data_dict.pop(oid)
+            if data is CHECKED_SERIAL:
+                raise ReadConflictError(oid=oid,
+                    serials=(serial, old_serial))
+            # TODO: data can be None if a conflict happens during undo
+            if data:
+                txn_context.data_size -= len(data)
+            if self.last_tid < serial:
+                self.sync() # possible late invalidation (very rare)
            try:
-                data, old_serial, _ = data_dict.pop(oid)
+                data = tryToResolveConflict(oid, serial, old_serial, data)
-            except KeyError:
+            except ConflictError:
-                assert oid is None, (oid, serial)
+                logging.info(
-                # Storage refused us from taking object lock, to avoid a
+                    'Conflict resolution failed for %s@%s with %s',
-                # possible deadlock. TID is actually used for some kind of
+                    dump(oid), dump(old_serial), dump(serial))
-                # "locking priority": when a higher value has the lock,
+                # With recent ZODB, get_pickle_metadata (from ZODB.utils)
-                # this means we stored objects "too late", and we would
+                # does not support empty values, so do not pass 'data'
-                # otherwise cause a deadlock.
+                # in this case.
-                # To recover, we must ask storages to release locks we
+                raise ConflictError(oid=oid, serials=(serial, old_serial),
-                # hold (to let possibly-competing transactions acquire
+                                    data=data or None)
-                # them), and requeue our already-sent store requests.
+            logging.info(
-                ttid = txn_context.ttid
+                'Conflict resolution succeeded for %s@%s with %s',
-                logging.info('Deadlock avoidance triggered for TXN %s'
+                dump(oid), dump(old_serial), dump(serial))
-                  ' with new locking TID %s', dump(ttid), dump(serial))
+            # Mark this conflict as resolved
-                txn_context.locking_tid = serial
+            resolved_dict[oid] = serial
-                packet = Packets.AskRebaseTransaction(ttid, serial)
+            # Try to store again
-                for uuid in txn_context.conn_dict:
+            self._store(txn_context, oid, serial, data)
-                    self._askStorageForWrite(txn_context, uuid, packet)
-            else:
-                if data is CHECKED_SERIAL:
-                    raise ReadConflictError(oid=oid,
-                        serials=(serial, old_serial))
-                # TODO: data can be None if a conflict happens during undo
-                if data:
-                    txn_context.data_size -= len(data)
-                if self.last_tid < serial:
-                    self.sync() # possible late invalidation (very rare)
-                try:
-                    data = tryToResolveConflict(oid, serial, old_serial, data)
-                except ConflictError:
-                    logging.info(
-                        'Conflict resolution failed for %s@%s with %s',
-                        dump(oid), dump(old_serial), dump(serial))
-                    # With recent ZODB, get_pickle_metadata (from ZODB.utils)
-                    # does not support empty values, so do not pass 'data'
-                    # in this case.
-                    raise ConflictError(oid=oid, serials=(serial, old_serial),
-                                        data=data or None)
-                else:
-                    logging.info(
-                        'Conflict resolution succeeded for %s@%s with %s',
-                        dump(oid), dump(old_serial), dump(serial))
-                    # Mark this conflict as resolved
-                    resolved_dict[oid] = serial
-                    # Try to store again
-                    self._store(txn_context, oid, serial, data)
    def _askStorageForWrite(self, txn_context, uuid, packet):
          conn = txn_context.conn_dict[uuid]
@@ -609,6 +587,7 @@ class Application(ThreadedApplication):
        """Store current transaction."""
        txn_context = self._txn_container.get(transaction)
        self.waitStoreResponses(txn_context)
+        txn_context.stored = True
        ttid = txn_context.ttid
        ext = transaction._extension
        ext = dumps(ext, _protocol) if ext else ''
@@ -640,10 +619,8 @@ class Application(ThreadedApplication):
                        if not (uuid in failed or uuid in uuid_set):
                            break
                    else:
-                        # Very unlikely. Instead of raising, we could recover
+                        # Very unlikely. Trying to recover
-                        # the transaction by doing something similar to
+                        # is not worth the complexity.
-                        # deadlock avoidance; that would be done before voting.
-                        # But it's not worth the complexity.
                        raise NEOStorageError(
                            'partition %s not fully write-locked' % offset)
            failed = [uuid for uuid, running in failed.iteritems() if running]

--- a/neo/client/handlers/master.py
+++ b/neo/client/handlers/master.py
@@ -149,13 +149,6 @@ class PrimaryNotificationsHandler(MTEventHandler):
                if node and node.isConnected():
                    node.getConnection().close()
-    def notifyDeadlock(self, conn, ttid, locking_tid):
-        for txn_context in self.app.txn_contexts():
-            if txn_context.ttid == ttid:
-                txn_context.conflict_dict[None] = locking_tid
-                txn_context.wakeup(conn)
-                break
 class PrimaryAnswersHandler(AnswerBaseHandler):
    """ Handle that process expected packets from the primary master """

--- a/neo/client/handlers/storage.py
+++ b/neo/client/handlers/storage.py
@@ -28,11 +28,24 @@ from ..transactions import Transaction
 from ..exception import NEOStorageError, NEOStorageNotFoundError
 from ..exception import NEOStorageReadRetry, NEOStorageDoesNotExistError
+@apply
+class _DeadlockPacket(object):
+    handler_method_name = 'notifyDeadlock'
+    _args = ()
+    getId = int
 class StorageEventHandler(MTEventHandler):
    def _acceptIdentification(*args):
        pass
+    def notifyDeadlock(self, conn, ttid, oid):
+        for txn_context in self.app.txn_contexts():
+            if txn_context.ttid == ttid:
+                txn_context.queue.put((conn, _DeadlockPacket, {'oid': oid}))
+                break
 class StorageBootstrapHandler(AnswerBaseHandler):
    """ Handler used when connecting to a storage node """
@@ -72,22 +85,28 @@ class StorageAnswersHandler(AnswerBaseHandler):
    answerCheckCurrentSerial = answerStoreObject
-    def answerRebaseTransaction(self, conn, oid_list):
+    def notifyDeadlock(self, conn, oid):
+        # To avoid a possible deadlock, storage nodes are waiting for our
+        # lock to be cancelled, so that a transaction that started earlier
+        # can complete. This is done by acquiring the lock again.
        txn_context = self.app.getHandlerData()
+        if txn_context.stored:
+            return
        ttid = txn_context.ttid
-        queue = txn_context.queue
+        logging.info('Deadlock avoidance triggered for %s:%s',
+            dump(oid), dump(ttid))
+        assert conn.getUUID() not in txn_context.data_dict.get(oid, ((),))[-1]
        try:
-            for oid in oid_list:
+            # We could have an extra parameter to tell the storage if we
-                # We could have an extra parameter to tell the storage if we
+            # still have the data, and in this case revert what was done
-                # still have the data, and in this case revert what was done
+            # in Transaction.written. This would save bandwidth in case of
-                # in Transaction.written. This would save bandwidth in case of
+            # conflict.
-                # conflict.
+            conn.ask(Packets.AskRelockObject(ttid, oid),
-                conn.ask(Packets.AskRebaseObject(ttid, oid),
+                     queue=txn_context.queue, oid=oid)
-                         queue=queue, oid=oid)
        except ConnectionClosed:
            txn_context.conn_dict[conn.getUUID()] = None
-    def answerRebaseObject(self, conn, conflict, oid):
+    def answerRelockObject(self, conn, conflict, oid):
        if conflict:
            txn_context = self.app.getHandlerData()
            serial, conflict, data = conflict
@@ -105,7 +124,7 @@ class StorageAnswersHandler(AnswerBaseHandler):
                assert oid in txn_context.data_dict
                if serial <= txn_context.conflict_dict.get(oid, ''):
                    # Another node already reported this conflict or a newer,
-                    # by answering to this rebase or to the previous store.
+                    # by answering to this relock or to the previous store.
                    return
                # A node has not answered yet to a previous store. Do not wait
                # it to report the conflict because it may fail before.
@@ -119,8 +138,8 @@ class StorageAnswersHandler(AnswerBaseHandler):
                    compression, checksum, data = data
                    if checksum != makeChecksum(data):
                        raise NEOStorageError(
-                            'wrong checksum while getting back data for'
+                            'wrong checksum while getting back data for %s:%s'
-                            ' object %s during rebase of transaction %s'
+                            ' (deadlock avoidance)'
                            % (dump(oid), dump(txn_context.ttid)))
                    data = decompress_list[compression](data)
                    size = len(data)

--- a/neo/client/transactions.py
+++ b/neo/client/transactions.py
@@ -22,19 +22,12 @@ from neo.lib.protocol import Packets
 from neo.lib.util import dump
 from .exception import NEOStorageError
-@apply
-class _WakeupPacket(object):
-    handler_method_name = 'pong'
-    _args = ()
-    getId = int
 class Transaction(object):
    cache_size = 0  # size of data in cache_dict
    data_size = 0   # size of data in data_dict
    error = None
-    locking_tid = None
+    stored = False
    voted = False
    ttid = None     # XXX: useless, except for testBackupReadOnlyAccess
    lockless_dict = None                    # {partition: {uuid}}
@@ -55,16 +48,13 @@ class Transaction(object):
    def __repr__(self):
        error = self.error
-        return ("<%s ttid=%s locking_tid=%s voted=%u"
+        return ("<%s ttid=%s voted=%u"
                " #queue=%s #writing=%s #written=%s%s>") % (
            self.__class__.__name__,
-            dump(self.ttid), dump(self.locking_tid), self.voted,
+            dump(self.ttid), self.voted,
            len(self.queue._queue), len(self.data_dict), len(self.cache_dict),
            ' error=%r' % error if error else '')
-    def wakeup(self, conn):
-        self.queue.put((conn, _WakeupPacket, {}))
    def write(self, app, packet, object_id, **kw):
        uuid_list = []
        pt = app.pt
@@ -78,14 +68,6 @@ class Transaction(object):
                    conn = conn_dict[uuid]
                except KeyError:
                    conn = conn_dict[uuid] = app.getStorageConnection(node)
-                    if self.locking_tid and 'oid' in kw:
-                        # A deadlock happened but this node is not aware of it.
-                        # Tell it to write-lock with the same locking tid as
-                        # for the other nodes. The condition on kw is to
-                        # distinguish whether we're writing an oid or
-                        # transaction metadata.
-                        conn.ask(Packets.AskRebaseTransaction(
-                            self.ttid, self.locking_tid), queue=self.queue)
                conn.ask(packet, queue=self.queue, **kw)
                uuid_list.append(uuid)
            except AttributeError:
@@ -128,8 +110,8 @@ class Transaction(object):
                lockless = self.lockless_dict = defaultdict(set)
            lockless[app.pt.getPartition(oid)].add(uuid)
            if oid in self.conflict_dict:
-                # In the case of a rebase, uuid_list may not contain the id
+                # In case of deadlock avoidance, uuid_list may not contain the
-                # of the node reporting a conflict.
+                # id of the node reporting a conflict.
                return
        if uuid_list:
            return

--- a/neo/lib/handler.py
+++ b/neo/lib/handler.py
@@ -322,8 +322,8 @@ class EventQueue(object):
    # Stable sort when 2 keys are equal.
    # XXX: Is it really useful to keep events with same key ordered
-    #      chronologically ? The caller could use more specific keys. For
+    #      chronologically ? The caller could use more specific keys.
-    #      write-locks (by the storage node), the locking tid seems enough.
+    #      For write-locks (by the storage node), the ttid seems enough.
    sortQueuedEvents = (lambda key=itemgetter(0): lambda self:
        self._event_queue.sort(key=key))()
@@ -334,14 +334,6 @@ class EventQueue(object):
        if key is not None:
            self.sortQueuedEvents()
-    def sortAndExecuteQueuedEvents(self):
-        if self._executing_event < 0:
-            self.sortQueuedEvents()
-            self.executeQueuedEvents()
-        else:
-            # We can't sort events when they're being processed.
-            self._executing_event = 1
    def executeQueuedEvents(self):
        # Not reentrant. When processing a queued event, calling this method
        # only tells the caller to retry all events from the beginning, because
@@ -369,10 +361,6 @@ class EventQueue(object):
                    while done:
                        del queue[done.pop()]
                self._executing_event = 0
-                # What sortAndExecuteQueuedEvents could not do immediately
-                # is done here:
-                if event[0] is not None:
-                    self.sortQueuedEvents()
            self._executing_event = -1
    def logQueuedEvents(self):

--- a/neo/lib/protocol.py
+++ b/neo/lib/protocol.py
@@ -20,7 +20,7 @@ from msgpack import packb
 # The protocol version must be increased whenever upgrading a node may require
 # to upgrade other nodes.
-PROTOCOL_VERSION = 1
+PROTOCOL_VERSION = 2
 # By encoding the handshake packet with msgpack, the whole NEO stream can be
 # decoded with msgpack. The first byte is 0x92, which is different from TLS
 # Handshake (0x16).
@@ -532,28 +532,17 @@ class Packets(dict):
        """)
    NotifyDeadlock = notify("""
-        Ask master to generate a new TTID that will be used by the client to
+        A client acquired a write-lock before another one that has a smaller
-        solve a deadlock by rebasing the transaction on top of concurrent
+        TTID, leading to a possible deadlock. In order to solve it, this asks
-        changes.
+        the client with the greatest TTID to lock again if it can't vote.
-        :nodes: S -> M -> C
+        :nodes: S -> C
        """)
-    AskRebaseTransaction, AnswerRebaseTransaction = request("""
+    AskRelockObject, AnswerRelockObject = request("""
-        Rebase a transaction to solve a deadlock.
+        Relock an object change to solve a deadlock.
        :nodes: C -> S
-        """)
-    AskRebaseObject, AnswerRebaseObject = request("""
-        Rebase an object change to solve a deadlock.
-        :nodes: C -> S
-        XXX: It is a request packet to simplify the implementation. For more
-             efficiency, this should be turned into a notification, and the
-             RebaseTransaction should answered once all objects are rebased
-             (so that the client can still wait on something).
        """, data_path=(1, 0, 2, 0))
    AskStoreObject, AnswerStoreObject = request("""

--- a/neo/master/handlers/storage.py
+++ b/neo/master/handlers/storage.py
@@ -68,9 +68,6 @@ class StorageServiceHandler(BaseServiceHandler):
        conn.answer(p)
        app.pt.updatable(conn.getUUID(), offset_list)
-    def notifyDeadlock(self, conn, *args):
-        self.app.tm.deadlock(conn.getUUID(), *args)
    def answerInformationLocked(self, conn, ttid):
        self.app.tm.lock(ttid, conn.getUUID())

--- a/neo/master/transactions.py
+++ b/neo/master/transactions.py
@@ -19,15 +19,13 @@ from time import time
 from struct import pack, unpack
 from neo.lib import logging
 from neo.lib.handler import DelayEvent, EventQueue
-from neo.lib.protocol import Packets, ProtocolError, uuid_str, \
+from neo.lib.protocol import ProtocolError, uuid_str, ZERO_OID, ZERO_TID
-    ZERO_OID, ZERO_TID
 from neo.lib.util import dump, u64, addTID, tidFromTime
 class Transaction(object):
    """
        A pending transaction
    """
-    locking_tid = ZERO_TID
    _tid = None
    _msg_id = None
    _oid_list = None
@@ -292,19 +290,6 @@ class TransactionManager(EventQueue):
        logging.debug('Begin %s', txn)
        return tid
-    def deadlock(self, storage_id, ttid, locking_tid):
-        try:
-            txn = self._ttid_dict[ttid]
-        except KeyError:
-            return
-        if txn.locking_tid <= locking_tid:
-            client = txn.getNode()
-            txn.locking_tid = locking_tid = self._nextTID()
-            logging.info('Deadlock avoidance triggered by %s for %s:'
-                ' new locking tid for TXN %s is %s', uuid_str(storage_id),
-                uuid_str(client.getUUID()), dump(ttid), dump(locking_tid))
-            client.send(Packets.NotifyDeadlock(ttid, locking_tid))
    def vote(self, app, ttid, uuid_list):
        """
            Check that the transaction can be voted

--- a/neo/storage/handlers/client.py
+++ b/neo/storage/handlers/client.py
@@ -133,25 +133,23 @@ class ClientOperationHandler(BaseHandler):
                compression, checksum, data, data_serial, ttid, time.time()),
            *e.args)
-    def askRebaseTransaction(self, conn, *args):
+    def askRelockObject(self, conn, ttid, oid):
-        conn.answer(Packets.AnswerRebaseTransaction(
-            self.app.tm.rebase(conn, *args)))
-    def askRebaseObject(self, conn, ttid, oid):
        try:
-            self._askRebaseObject(conn, ttid, oid, None)
+            self.app.tm.relockObject(ttid, oid, True)
        except DelayEvent, e:
            # locked by a previous transaction, retry later
-            self.app.tm.queueEvent(self._askRebaseObject,
+            self.app.tm.queueEvent(self._askRelockObject,
                conn, (ttid, oid, time.time()), *e.args)
+        else:
+            conn.answer(Packets.AnswerRelockObject(None))
-    def _askRebaseObject(self, conn, ttid, oid, request_time):
+    def _askRelockObject(self, conn, ttid, oid, request_time):
-        conflict = self.app.tm.rebaseObject(ttid, oid)
+        conflict = self.app.tm.relockObject(ttid, oid, False)
        if request_time and SLOW_STORE is not None:
            duration = time.time() - request_time
            if duration > SLOW_STORE:
-                logging.info('RebaseObject delay: %.02fs', duration)
+                logging.info('RelockObject delay: %.02fs', duration)
-        conn.answer(Packets.AnswerRebaseObject(conflict))
+        conn.answer(Packets.AnswerRelockObject(conflict))
    def askTIDsFrom(self, conn, min_tid, max_tid, length, partition):
        conn.answer(Packets.AnswerTIDsFrom(self.app.dm.getReplicationTIDList(
@@ -251,8 +249,7 @@ class ClientReadOnlyOperationHandler(ClientOperationHandler):
    askVoteTransaction      = _readOnly
    askStoreObject          = _readOnly
    askFinalTID             = _readOnly
-    askRebaseObject         = _readOnly
+    askRelockObject         = _readOnly
-    askRebaseTransaction    = _readOnly
    # takes write lock & is only used when going to commit
    askCheckCurrentSerial   = _readOnly

--- a/neo/storage/transactions.py
+++ b/neo/storage/transactions.py
@@ -17,7 +17,7 @@
 from time import time
 from neo.lib import logging
 from neo.lib.handler import DelayEvent, EventQueue
-from neo.lib.util import dump
+from neo.lib.util import cached_property, dump
 from neo.lib.protocol import Packets, ProtocolError, NonReadableCell, \
    uuid_str, MAX_TID, ZERO_TID
@@ -47,7 +47,6 @@ class Transaction(object):
    def __init__(self, uuid, ttid):
        self._birth = time()
-        self.locking_tid = ttid
        self.uuid = uuid
        self.serial_dict = {}
        self.store_dict = {}
@@ -56,17 +55,21 @@ class Transaction(object):
        # the replication is finished.
        self.lockless = set()
+    @cached_property
+    def deadlock(self):
+        # Remember the oids for which we sent a deadlock notification.
+        return set()
    def __repr__(self):
-        return "<%s(%s, locking_tid=%s, tid=%s, age=%.2fs) at 0x%x>" % (
+        return "<%s(%s, tid=%s, age=%.2fs) at 0x%x>" % (
            self.__class__.__name__,
            uuid_str(self.uuid),
-            dump(self.locking_tid),
            dump(self.tid),
            time() - self._birth,
            id(self))
    def __lt__(self, other):
-        return self.locking_tid < other.locking_tid
+        raise AssertionError
    def logDelay(self, ttid, locked, oid_serial):
        if self._delayed.get(oid_serial) != locked:
@@ -161,8 +164,7 @@ class TransactionManager(EventQueue):
        # We sort transactions so that in case of multiple stores/checks
        # for the same oid, the lock is taken by the highest locking ttid,
        # which will delay new transactions.
-        for txn, ttid in sorted((txn, ttid) for ttid, txn in
+        for ttid, txn in sorted(self._transaction_dict.iteritems()):
-                                self._transaction_dict.iteritems()):
            assert txn.lockless.issubset(txn.serial_dict), (
                ttid, txn.lockless, txn.serial_dict)
            for oid in txn.lockless:
@@ -217,84 +219,6 @@ class TransactionManager(EventQueue):
        except KeyError:
            return None
-    def _rebase(self, transaction, ttid, locking_tid=MAX_TID):
-        # With the default value of locking_tid, this marks the transaction as
-        # being rebased, in case that the current lock is released (the other
-        # transaction is aborted or committed) before the client sends us a new
-        # locking tid: in lockObject, 'locked' will be None but we'll still
-        # have to delay the store.
-        transaction.locking_tid = locking_tid
-        if ttid:
-            # Remove store locks we have.
-            # In order to keep all locking data consistent, this must be done
-            # when the locking tid changes, i.e. from both 'lockObject' (for
-            # the node that triggered the deadlock) and 'rebase' (for other
-            # nodes).
-            for oid, locked in self._store_lock_dict.items():
-                # If this oid is locked by several transactions (all lockless),
-                # the following condition is true if we have the highest ttid,
-                # but in either case, _notifyReplicated will be called below,
-                # fixing the store lock.
-                if locked == ttid:
-                    del self._store_lock_dict[oid]
-            # However here, we don't try to remember lockless writes: later,
-            # we may give write-locks to oids that would normally conflict.
-            # Readable cells prevent such scenario to go wrong.
-            lockless = transaction.lockless
-            if locking_tid == MAX_TID:
-                if lockless:
-                    lockless.clear()
-                    self._notifyReplicated()
-            else:
-                # There's nothing to rebase for lockless stores to replicating
-                # partitions because there's no lock taken yet. In other words,
-                # rebasing such stores would do nothing. Other lockless stores
-                # become normal ones: this transaction does not block anymore
-                # replicated partitions from being marked as UP_TO_DATE.
-                oid = [oid
-                    for oid in lockless
-                    if self.getPartition(oid) not in self._replicating]
-                if oid:
-                    lockless.difference_update(oid)
-                    self._notifyReplicated()
-        # Some locks were released, some pending locks may now succeed.
-        # We may even have delayed stores for this transaction, like the one
-        # that triggered the deadlock. They must also be sorted again because
-        # our locking tid has changed.
-        self.sortAndExecuteQueuedEvents()
-    def rebase(self, conn, ttid, locking_tid):
-        self.register(conn, ttid)
-        transaction = self._transaction_dict[ttid]
-        if transaction.voted:
-            raise ProtocolError("TXN %s already voted" % dump(ttid))
-        # First, get a set copy of serial_dict before _rebase locks oids.
-        lock_set = set(transaction.serial_dict)
-        self._rebase(transaction, transaction.locking_tid != MAX_TID and ttid,
-                     locking_tid)
-        if transaction.locking_tid == MAX_TID:
-            # New deadlock. There's no point rebasing objects now.
-            return ()
-        # We return all oids that can't be relocked trivially
-        # (the client will use RebaseObject for these oids).
-        lock_set -= transaction.lockless # see comment in _rebase
-        recheck_set = lock_set.intersection(self._store_lock_dict)
-        lock_set -= recheck_set
-        for oid in lock_set:
-            try:
-                serial = transaction.serial_dict[oid]
-            except KeyError:
-                # An oid was already being rebased and delayed,
-                # and it got a conflict during the above call to _rebase.
-                continue
-            try:
-                self.lockObject(ttid, serial, oid)
-            except ConflictError:
-                recheck_set.add(oid)
-            except (DelayEvent, NonReadableCell), e: # pragma: no cover
-                raise AssertionError(e)
-        return recheck_set
    def vote(self, ttid, txn_info=None):
        """
            Store transaction information received from client node
@@ -378,15 +302,12 @@ class TransactionManager(EventQueue):
            # replicate, and we're expected to store it in full.
            # Since there's at least 1 other (readable) cell that will do this
            # check, we accept this store/check without taking a lock.
-            if transaction.locking_tid == MAX_TID:
-                # Deadlock avoidance. Still no new locking_tid from the client.
-                raise DelayEvent(transaction)
            transaction.lockless.add(oid)
            return
        locked = self._store_lock_dict.get(oid)
        if locked:
            other = self._transaction_dict[locked]
-            if other < transaction or other.voted:
+            if locked < ttid or other.voted:
                # We have a bigger "TTID" than locking transaction, so we are
                # younger: enter waiting queue so we are handled when lock gets
                # released. We also want to delay (instead of conflict) if the
@@ -399,7 +320,7 @@ class TransactionManager(EventQueue):
                # but this is not a problem. EventQueue processes them in order
                # and only the last one will not result in conflicts (that are
                # already resolved).
-                raise DelayEvent(transaction)
+                raise DelayEvent(ttid)
            if oid in transaction.lockless:
                # This is a consequence of not having taken a lock during
                # replication. After a ConflictError, we may be asked to "lock"
@@ -407,37 +328,34 @@ class TransactionManager(EventQueue):
                # new transactions.
                # For the cluster, we're still out-of-date and like above,
                # at least 1 other (readable) cell checks for conflicts.
+                # IDEA: If the shared-lock is assigned to us, consider
+                #       reassigning it (hoping it won't be shared anymore),
+                #       and delay (unstoring the previous store may be tricky).
                return
            if other is not transaction:
                # We have a smaller "TTID" than locking transaction, so we are
                # older: this is a possible deadlock case, as we might already
                # hold locks the younger transaction is waiting upon.
-                logging.info('Deadlock on %s:%s with %s',
+                if oid in other.lockless:
-                             dump(oid), dump(ttid), dump(locked))
+                    transaction.lockless.add(oid)
-                # Ask master to give the client a new locking tid, which will
+                    return
-                # be used to ask all involved storage nodes to rebase the
+                # Do not notify whenever the older transaction
-                # already locked oids for this transaction.
+                # retries to lock (executeQueuedEvents).
-                self._app.master_conn.send(Packets.NotifyDeadlock(
+                if oid not in other.deadlock:
-                    ttid, transaction.locking_tid))
+                    other.deadlock.add(oid)
-                self._rebase(transaction, ttid)
+                    logging.info('Deadlock on %s:%s with %s',
-                raise DelayEvent(transaction)
+                                 dump(oid), dump(ttid), dump(locked))
+                    # Ask the older transaction to lock again.
+                    # But keep the lock for the moment in case it can vote.
+                    self._app.nm.getByUUID(other.uuid).send(
+                        Packets.NotifyDeadlock(locked, oid))
+                raise DelayEvent(ttid)
            # If previous store was an undo, next store must be based on
            # undo target.
-            try:
+            previous_serial = transaction.store_dict[oid][2]
-                previous_serial = transaction.store_dict[oid][2]
-            except KeyError:
-                # Similarly to below for store, cascaded deadlock for
-                # checkCurrentSerial is possible because rebase() may return
-                # oids for which previous rebaseObject are delayed, or being
-                # received, and the client will bindly resend them.
-                assert oid in transaction.serial_dict, transaction
-                logging.info('Transaction %s checking %s more than once',
-                             dump(ttid), dump(oid))
-                return True
            if previous_serial is None:
-                # 2 valid cases:
+                # The only valid case is when the previous undo resulted in a
-                # - the previous undo resulted in a resolved conflict
+                # resolved conflict.
-                # - cascaded deadlock resolution
                # Otherwise, this should not happen. For example, when being
                # disconnected by the master because we missed a transaction,
                # a conflict may happen after a first store to us, but the
@@ -447,9 +365,6 @@ class TransactionManager(EventQueue):
                logging.info('Transaction %s storing %s more than once',
                             dump(ttid), dump(oid))
                return True
-        elif transaction.locking_tid == MAX_TID:
-            # Deadlock avoidance. Still no new locking_tid from the client.
-            raise DelayEvent(transaction)
        else:
            try:
                previous_serial = self._app.dm.getLastObjectTID(oid)
@@ -517,24 +432,29 @@ class TransactionManager(EventQueue):
        if not locked:
            return ZERO_TID
-    def rebaseObject(self, ttid, oid):
+    def relockObject(self, ttid, oid, unlock):
        try:
            transaction = self._transaction_dict[ttid]
        except KeyError:
-            logging.info('Forget rebase of %s by %s delayed by %s',
+            logging.info('Forget relock of %s by %s delayed by %s',
                dump(oid), dump(ttid), dump(self.getLockingTID(oid)))
            return
        try:
            serial = transaction.serial_dict[oid]
        except KeyError:
-            # There was a previous rebase for this oid, it was still delayed
+            # This can happen when a partition is dropped.
-            # during the second RebaseTransaction, and then a conflict was
+            logging.info("no oid %s to relock for transaction %s",
-            # reported when another transaction was committed.
-            # This can also happen when a partition is dropped.
-            logging.info("no oid %s to rebase for transaction %s",
                dump(oid), dump(ttid))
            return
        assert oid not in transaction.lockless, (oid, transaction.lockless)
+        if unlock:
+            transaction.deadlock.remove(oid)
+            locked = self._store_lock_dict.pop(oid)
+            assert locked == ttid, (oid, locked, ttid)
+            # Now that the lock is released, the younger transaction that triggered
+            # this deadlock avoidance should be able to lock.
+            self.executeQueuedEvents()
+            # And we'll likely be delayed.
        try:
            self.lockObject(ttid, serial, oid)
        except ConflictError, e:
@@ -591,8 +511,7 @@ class TransactionManager(EventQueue):
            try:
                write_locking_tid = self._store_lock_dict[oid]
            except KeyError:
-                # Lockless store (we are replicating this partition),
+                # Lockless store (we are replicating this partition).
-                # or unresolved deadlock.
                continue
            if ttid == write_locking_tid:
                del self._store_lock_dict[oid]
@@ -604,11 +523,11 @@ class TransactionManager(EventQueue):
                if oid in transaction.lockless:
                    # Several lockless stores for this oid and among them,
                    # a higher ttid is still pending.
-                    assert transaction < other, x
+                    assert ttid < write_locking_tid, x
                    # There may remain a single lockless store so we'll need
                    # this partition to be checked in _notifyReplicated.
                    assert self._replicated.get(self.getPartition(oid)), x
-                else: # unresolved deadlock
+                else: # delayed relock
                    assert not locked, x
        # remove the transaction
        del self._transaction_dict[ttid]

--- a/neo/tests/protocol
+++ b/neo/tests/protocol
@@ -26,9 +26,8 @@ AnswerPack(bool)
 AnswerPartitionList(int,int,[[(int,CellStates)]])
 AnswerPartitionTable(int,int,[[(int,CellStates)]])
 AnswerPrimary(int)
-AnswerRebaseObject(?(p64,p64,?(int,bin,bin)))
-AnswerRebaseTransaction([p64])
 AnswerRecovery(?int,?p64,?p64)
+AnswerRelockObject(?(p64,p64,?(int,bin,bin)))
 AnswerStoreObject(?p64)
 AnswerStoreTransaction()
 AnswerTIDs([p64])
@@ -61,9 +60,8 @@ AskPack(p64)
 AskPartitionList(int,int,?)
 AskPartitionTable()
 AskPrimary()
-AskRebaseObject(p64,p64)
-AskRebaseTransaction(p64,p64)
 AskRecovery()
+AskRelockObject(p64,p64)
 AskStoreObject(p64,p64,int,bin,bin,?p64,?p64)
 AskStoreTransaction(p64,bin,bin,bin,[p64])
 AskTIDs(int,int,int)

--- a/neo/tests/threaded/test.py
+++ b/neo/tests/threaded/test.py
@@ -36,11 +36,12 @@ from neo.lib.handler import DelayEvent, EventHandler
 from neo.lib import logging
 from neo.lib.protocol import (CellStates, ClusterStates, NodeStates, NodeTypes,
    Packets, Packet, uuid_str, ZERO_OID, ZERO_TID, MAX_TID)
-from .. import unpickle_state, Patch, TransactionalResource
+from .. import Patch, TransactionalResource
 from . import ClientApplication, ConnectionFilter, LockLock, NEOCluster, \
    NEOThreadedTest, RandomConflictDict, Serialized, ThreadId, with_cluster
 from neo.lib.util import add64, makeChecksum, p64, u64
 from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError
+from neo.client.handlers.storage import _DeadlockPacket
 from neo.client.transactions import Transaction
 from neo.master.handlers.client import ClientServiceHandler
 from neo.master.pt import PartitionTable
@@ -48,7 +49,6 @@ from neo.storage.database import DatabaseFailure
 from neo.storage.handlers.client import ClientOperationHandler
 from neo.storage.handlers.identification import IdentificationHandler
 from neo.storage.handlers.initialization import InitializationHandler
-from neo.storage.replicator import Replicator
 class PCounter(Persistent):
    value = 0
@@ -279,7 +279,7 @@ class Test(NEOThreadedTest):
        self.assertEqual(except_list, [DelayEvent])
    @with_cluster(storage_count=2, replicas=1)
-    def _testDeadlockAvoidance(self, cluster, scenario):
+    def _testDeadlockAvoidance(self, cluster, scenario, delayed_conflict=None):
        except_list = []
        delay = threading.Event(), threading.Event()
        ident = get_ident()
@@ -304,6 +304,9 @@ class Test(NEOThreadedTest):
                delay[c2].wait()
                scenario[0] -= 1
                switch = not scenario[0]
+            if c2 and delayed:
+                f.remove(delayed.pop())
+                self.assertFalse(delayed)
            try:
                return orig(conn, packet, *args, **kw)
            finally:
@@ -324,7 +327,14 @@ class Test(NEOThreadedTest):
        o1.value += 1
        o2.value += 2
-        with Patch(TransactionManager, storeObject=onStoreObject), \
+        delayed = []
+        if delayed_conflict:
+            def finish1(*_):
+                d = f.byPacket(delayed_conflict, lambda _: delayed.append(d))
+            TransactionalResource(t1, 0, tpc_finish=finish1)
+        with ConnectionFilter() as f, \
+             Patch(TransactionManager, storeObject=onStoreObject), \
             Patch(MTClientConnection, ask=onAsk):
            t = self.newThread(t1.commit)
            t2.commit()
@@ -343,14 +353,24 @@ class Test(NEOThreadedTest):
        self.assertEqual(self._testDeadlockAvoidance([2, 4]),
            [DelayEvent, DelayEvent, ConflictError, ConflictError])
-    def testDeadlockAvoidance(self):
+    def testDeadlockAvoidance(self, **kw):
        # 0: C1 -> S1
        # 1: C2 -> S1, S2 (delayed)
        # 2: C1 -> S2 (deadlock)
-        # 3: C2 commits
+        # 3: C2 -> S2 (delayed)
-        # 4: C1 resolves conflict
+        # 4: C1 commits
-        self.assertEqual(self._testDeadlockAvoidance([1, 3]),
+        # 5: C2 resolves conflict
-            [DelayEvent, DelayEvent, DelayEvent, ConflictError])
+        self.assertEqual(self._testDeadlockAvoidance([1, 3], **kw),
+            [DelayEvent, DelayEvent, ConflictError])
+    # The following 2 tests cover extra paths when processing
+    # AnswerRelockObject.
+    def testDeadlockAvoidanceConflictVsLateRelock(self):
+        self.testDeadlockAvoidance(delayed_conflict=Packets.AnswerRelockObject)
+    def testDeadlockAvoidanceRelockVsLateConflict(self):
+        self.testDeadlockAvoidance(delayed_conflict=Packets.AnswerStoreObject)
    @with_cluster()
    def testConflictResolutionTriggered2(self, cluster):
@@ -2093,7 +2113,11 @@ class Test(NEOThreadedTest):
        self.assertEqual(x.value, 6)
    @contextmanager
-    def thread_switcher(self, threads, order, expected):
+    def thread_switcher(self, threads, order, expected,
+            # XXX: sched() can be recursive while handling conflicts. In some
+            #      cases, when the test is ending, it seems to work provided
+            #      we ignore a few exceptions.
+            allow_recurse=False):
        self.assertGreaterEqual(len(order), len(expected))
        thread_id = ThreadId()
        l = [threading.Lock() for l in xrange(len(threads)+1)]
@@ -2103,31 +2127,34 @@ class Test(NEOThreadedTest):
        expected = iter(expected)
        def sched(orig, *args, **kw):
            i = thread_id()
-            logging.info('%s: %s%r', i, orig.__name__, args)
+            e = orig.__name__
+            logging.info('%s: %s%r', i, e, args)
            try:
-                x = u64(kw['oid'])
+                e = u64((args[3] if e == '_handlePacket' else kw)['oid'])
            except KeyError:
                for x in args:
                    if isinstance(x, Packet):
-                        x = type(x).__name__
+                        e = type(x).__name__
                        break
-                else:
-                    x = orig.__name__
            try:
                j = next(order)
            except StopIteration:
-                end[i].append(x)
+                end[i].append(e)
                j = None
                try:
                    while 1:
-                        l.pop().release()
+                        try:
+                            l.pop().release()
+                        except threading.ThreadError:
+                            if not allow_recurse:
+                                raise
                except IndexError:
                    pass
            else:
                try:
-                    self.assertEqual(next(expected), x)
+                    self.assertEqual(next(expected), e)
                except StopIteration:
-                    end[i].append(x)
+                    end[i].append(e)
            try:
                if callable(j):
                    with contextmanager(j)(*args, **kw) as j:
@@ -2140,14 +2167,19 @@ class Test(NEOThreadedTest):
                        l[j].release()
                    except threading.ThreadError:
                        l[j].acquire()
-                        threads[j-1].start()
+                        try:
-                    if x != 'AskStoreTransaction':
+                            threads[j-1].start()
+                        except RuntimeError:
+                            if not allow_recurse:
+                                raise
+                            l[j].release()
+                    if e != 'AskStoreTransaction':
                        try:
                            l[i].acquire()
                        except IndexError:
                            pass
        def _handlePacket(orig, *args):
-            if isinstance(args[2], Packets.AnswerRebaseTransaction):
+            if args[2] is _DeadlockPacket:
                return sched(orig, *args)
            return orig(*args)
        with RandomConflictDict, \
@@ -2167,184 +2199,71 @@ class Test(NEOThreadedTest):
        t1, c1 = cluster.getTransaction()
        r = c1.root()
        oids = []
-        for x in 'abcd':
+        for x in 'abc':
            r[x] = PCounterWithResolution()
            t1.commit()
            oids.append(u64(r[x]._p_oid))
        # The test relies on the implementation-defined behavior that ZODB
        # processes oids by order of registration. It's also simpler with
-        # oids from a=1 to d=4.
+        # oids from a=1 to c=3.
-        self.assertEqual(oids, range(1, 5))
+        self.assertEqual(oids, [1, 2, 3])
        t2, c2 = cluster.getTransaction()
        t3, c3 = cluster.getTransaction()
        changes(r, c2.root(), c3.root())
        threads = map(self.newPausedThread, (t2.commit, t3.commit))
        with self.thread_switcher(threads, order, expected_packets) as end:
            t1.commit()
+            threads[0].join()
            if except2 is None:
-                threads[0].join()
+                threads[1].join()
            else:
-                self.assertRaises(except2, threads[0].join)
+                self.assertRaises(except2, threads[1].join)
-            threads[1].join()
        t3.begin()
        r = c3.root()
-        self.assertEqual(expected_values, [r[x].value for x in 'abcd'])
+        self.assertEqual(expected_values, [r[x].value for x in 'abc'])
        return dict(end)
-    def testCascadedDeadlockAvoidanceWithOneStorage1(self):
+    def _testConflictDuringDeadlockAvoidance(self, change):
-        """
-        locking tids: t1 < t2 < t3
-        1. A2 (t2 stores A)
-        2. B1, c2 (t2 checks C)
-        3. A3 (delayed), B3 (delayed), D3 (delayed)
-        4. C1 -> deadlock: B3
-        5. d2 -> deadlock: A3
-        locking tids: t3 < t1 < t2
-        6. t3 commits
-        7. t2 rebase: conflicts on A and D
-        8. t1 rebase: new deadlock on C
-        9. t2 aborts (D non current)
-        all locks released for t1, which rebases and resolves conflicts
-        """
        def changes(r1, r2, r3):
-            r1['b'].value += 1
+            r1['a'].value = 1
-            r1['c'].value += 2
+            self.readCurrent(r1['c'])
-            r2['a'].value += 3
+            r2['b'].value = 2
-            self.readCurrent(r2['c'])
+            r2['c'].value = 3
-            self.readCurrent(r2['d'])
+            r3['b'].value = 4
-            r3['a'].value += 4
+            change(r3['a'])
-            r3['b'].value += 5
-            r3['d'].value += 6
-        x = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
-            (1, 1, 0, 1, 2, 2, 2, 2, 0, 1, 2, 1, 0, 0, 1, 0, 0, 1),
-            ('tpc_begin', 'tpc_begin', 1, 2, 3, 'tpc_begin', 1, 2, 4, 3, 4,
-             'AskStoreTransaction', 'AskRebaseTransaction',
-             'AskRebaseTransaction', 'AnswerRebaseTransaction',
-             'AnswerRebaseTransaction', 'AskRebaseTransaction',
-             'AnswerRebaseTransaction'),
-            [4, 6, 2, 6])
-        try:
-            x[1].remove(1)
-        except ValueError:
-            pass
-        self.assertEqual(x, {0: [2, 'AskStoreTransaction'], 1: ['tpc_abort']})
-    def testCascadedDeadlockAvoidanceWithOneStorage2(self):
-        def changes(r1, r2, r3):
-            r1['a'].value += 1
-            r1['b'].value += 2
-            r1['c'].value += 3
-            r2['a'].value += 4
-            r3['b'].value += 5
-            r3['c'].value += 6
-            self.readCurrent(r2['c'])
-            self.readCurrent(r2['d'])
-            self.readCurrent(r3['d'])
-            def unlock(orig, *args):
-                f.remove(rebase)
-                return orig(*args)
-            rebase = f.delayAskRebaseTransaction(
-                Patch(TransactionManager, unlock=unlock))
-        with ConnectionFilter() as f:
-            x = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
-                (0, 1, 1, 0, 1, 2, 2, 2, 2, 0, 1, 2, 1,
-                 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1),
-                ('tpc_begin', 1, 'tpc_begin', 1, 2, 3, 'tpc_begin',
-                 2, 3, 4, 3, 4, 'AskStoreTransaction', 'AskRebaseTransaction',
-                 'AskRebaseTransaction', 'AnswerRebaseTransaction'),
-                [1, 7, 9, 0])
-        x[0].sort(key=str)
-        try:
-            x[1].remove(1)
-        except ValueError:
-            pass
-        self.assertEqual(x, {
-            0: [2, 3, 'AnswerRebaseTransaction',
-                'AskRebaseTransaction', 'AskStoreTransaction'],
-            1: ['AnswerRebaseTransaction','AskRebaseTransaction',
-                'AnswerRebaseTransaction', 'tpc_abort'],
-        })
-    def testCascadedDeadlockAvoidanceOnCheckCurrent(self):
-        """Transaction checking an oid more than once
-        1. t1 < t2
-        2. t1 deadlocks, t2 gets all locks
-        3. t2 < t1 < t3
-        4. t2 finishes: conflict on A, t1 locks C, t3 locks B
-        5. t1 rebases B -> second deadlock
-        6. t1 resolves A
-        7. t3 resolves A -> deadlock, and t1 locks B
-        8. t1 rebases B whereas it was already locked
-        """
-        def changes(*r):
-            for r in r:
-                r['a'].value += 1
-                self.readCurrent(r['b'])
-                self.readCurrent(r['c'])
-        t = []
-        def vote_t2(*args, **kw):
-            yield 0
-            t.append(threading.currentThread())
-        def tic_t1(*args, **kw):
-            # Make sure t2 finishes before rebasing B,
-            # so that B is locked by a newer transaction (t3).
-            t[0].join()
-            yield 0
        end = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
-            (0, 1, 1, 0, 1, 1, 0, 0, 2, 2, 2, 2, 1, vote_t2, tic_t1),
+            (1, 2, 2, 2, 1, 1, 2, 0, 0, 1, 2, 0, 2, 1),
-            ('tpc_begin', 1) * 2, [3, 0, 0, 0], None)
+            ('tpc_begin', 'tpc_begin', 'tpc_begin', 2, 1, 2, 3, 2, 1, 3, 3, 1,
-        self.assertLessEqual(2, end[0].count('AskRebaseTransaction'))
+            'AskStoreTransaction', 'tpc_abort'),
+            [1, 2, 3], POSException.ConflictError)
+        self.assertEqual(end, {1: ['AskStoreTransaction']})
    def testFailedConflictOnBigValueDuringDeadlockAvoidance(self):
-        def changes(r1, r2, r3):
+        @self._testConflictDuringDeadlockAvoidance
-            r1['b'].value = 1
+        def change(ob):
-            r1['d'].value = 2
+            ob.value = '*' * ob._p_jar.db().storage._cache.max_size
-            r2['a'].value = '*' * r2._p_jar.db().storage._cache.max_size
-            r2['b'].value = 3
+    def testFailedCheckCurrentDuringDeadlockAvoidance(self):
-            r2['c'].value = 4
+        self._testConflictDuringDeadlockAvoidance(self.readCurrent)
-            r3['a'].value = 5
-            self.readCurrent(r3['c'])
+    @with_cluster(replicas=1, partitions=2)
-            self.readCurrent(r3['d'])
-        with ConnectionFilter() as f:
-            x = self._testComplexDeadlockAvoidanceWithOneStorage(changes,
-                (1, 1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 1, 1, 1, 0),
-                ('tpc_begin', 'tpc_begin', 1, 2, 'tpc_begin', 1, 3, 3, 4,
-                'AskStoreTransaction', 2, 4, 'AskRebaseTransaction',
-                'AnswerRebaseTransaction', 'tpc_abort'),
-                [5, 1, 0, 2], POSException.ConflictError)
-        self.assertEqual(x, {0: ['AskStoreTransaction']})
-    @with_cluster(replicas=1, partitions=4)
    def testNotifyReplicated(self, cluster):
        """
        Check replication while several concurrent transactions leads to
        conflict resolutions and deadlock avoidances, and in particular the
        handling of write-locks when the storage node is about to notify the
        master that partitions are replicated.
-        Transactions are committed in the following order:
-        - t2
+        About releasing shared write-locks:
-        - t4, conflict on 'd'
+        - The test was originally written with only t1 & t3, covering only the
-        - t1, deadlock on 'a'
+          case of not releasing a write-lock that is owned by a higher TTID.
-        - t3, deadlock on 'b', and 2 conflicts on 'a'
+        - t4 was then added to test the opposite case ('a' on s1 by t3 & t4,
-        Special care is also taken for the change done by t3 on 'a', to check
+          and t4 finishes first): the write-lock is released but given
-        that the client resolves conflicts with correct oldSerial:
+          immediately after while checking for replicated partitions.
-        1. The initial store (a=8) is first delayed by t2.
-        2. It is then kept aside by the deadlock.
+        At last, t2 was added to trigger a deadlock when t3 is about to vote:
-        3. On s1, deadlock avoidance happens after t1 stores a=7 and the store
+        - the notification from s0 is ignored
-           is delayed again. However, it's the contrary on s0, and a conflict
+        - s1 does not notify because 'a' is still lockless
-           is reported to the client.
-        4. Second store (a=12) based on t2.
-        5. t1 finishes and s1 reports the conflict for first store (with t1).
-           At that point, the base serial of this store is meaningless:
-           the client only has data for last store (based on t2), and it's its
-           base serial that must be used. t3 write 15 (and not 19 !).
-        6. Conflicts for the second store are with t2 and they're ignored
-           because they're already resolved.
-        Note that this test method lacks code to enforce some events to happen
-        in the expected order. Sometimes, the above scenario is not reproduced
-        entirely, but it's so rare that there's no point in making the code
-        further complicated.
        """
        s0, s1 = cluster.storage_list
        s1.stop()
@@ -2352,96 +2271,73 @@ class Test(NEOThreadedTest):
        s1.resetNode()
        t1, c1 = cluster.getTransaction()
        r = c1.root()
-        for x in 'abcd':
+        for x in 'abc':
            r[x] = PCounterWithResolution()
            t1.commit()
        t3, c3 = cluster.getTransaction()
-        r['c'].value += 1
+        r['a'].value += 1
        t1.commit()
-        r['b'].value += 2
+        r['a'].value += 2
-        r['a'].value += 3
+        r['c'].value += 3
        t2, c2 = cluster.getTransaction()
        r = c2.root()
        r['a'].value += 4
-        r['c'].value += 5
-        r['d'].value += 6
        r = c3.root()
-        r['c'].value += 7
+        r['b'].value += 5
-        r['a'].value += 8
+        r['c'].value += 6
-        r['b'].value += 9
+        r['a'].value += 7
        t4, c4 = cluster.getTransaction()
        r = c4.root()
-        r['d'].value += 10
+        r['a'].value += 8
        threads = map(self.newPausedThread, (t2.commit, t3.commit, t4.commit))
-        def t3_c(*args, **kw):
+        deadlocks = [(3, False)] # by t1
-            yield 1
+        def t3_relock(*args, **kw):
-            # We want to resolve the conflict before storing A.
+            self.assertPartitionTable(cluster, 'UO|UO')
-            self.tic()
-        def t3_resolve(*args, **kw):
-            self.assertPartitionTable(cluster, 'UO|UO|UO|UO')
            f.remove(delay)
            self.tic()
-            self.assertPartitionTable(cluster, 'UO|UO|UU|UO')
+            self.assertPartitionTable(cluster, 'UU|UO')
-            yield
+            yield 0
-        def t1_rebase(*args, **kw):
+        def t2_store(*args, **kw):
-            self.tic()
+            self.assertFalse(deadlocks)
-            self.assertPartitionTable(cluster, 'UO|UU|UU|UO')
+            deadlocks.append((1, True))
-            yield
+            yield 3
-        def t3_b(*args, **kw):
+        def t4_vote(*args, **kw):
-            yield 1
-            self.tic()
-            self.assertPartitionTable(cluster, 'UO|UU|UU|UU')
-        def t4_d(*args, **kw):
-            self.tic()
-            self.assertPartitionTable(cluster, 'UU|UU|UU|UU')
            yield 2
-        # Delay the conflict for the second store of 'a' by t3.
+            f.remove(delayDeadlock)
-        delay_conflict = {s0.uuid: [1], s1.uuid: [1,0]}
+            self.assertFalse(deadlocks)
-        def delayConflict(conn, packet):
+        def delayDeadlock(conn, packet):
-            app = self.getConnectionApp(conn)
+            if isinstance(packet, Packets.NotifyDeadlock):
-            if (isinstance(packet, Packets.AnswerStoreObject)
+                self.assertEqual(self.getConnectionApp(conn).uuid, s0.uuid)
-                and packet._args[0]):
+                oid, delay = deadlocks.pop()
-                conn, = cluster.client.getConnectionList(app)
+                self.assertEqual(u64(packet._args[1]), oid)
-                kw = conn._handlers._pending[0][0][packet._id][1]
+                return delay
-                return 1 == u64(kw['oid']) and delay_conflict[app.uuid].pop()
-        def writeA(orig, txn_context, oid, serial, data):
-            if u64(oid) == 1:
-                value = unpickle_state(data)['value']
-                if value > 12:
-                    f.remove(delayConflict)
-                elif value == 12:
-                    f.add(delayConflict)
-            return orig(txn_context, oid, serial, data)
-        ###
        with ConnectionFilter() as f, \
-             Patch(cluster.client, _store=writeA), \
             self.thread_switcher(threads,
-                (1, 2, 3, 0, 1, 0, 2, t3_c, 1, 3, 2, t3_resolve, 0, 0, 0,
+                (1, 2, 3, 2, 2, 2, 0, 3, 0, 2, t3_relock, 3,
-                 t1_rebase, 2, t3_b, 3, t4_d, 0, 2, 2),
+                 1, t2_store, t4_vote, 2, 2),
                ('tpc_begin', 'tpc_begin', 'tpc_begin', 'tpc_begin',
-                 2, 1, 1, 3, 3, 4, 4, 3, 1,
+                 2, 3, 1, 1, 1, 3, 3, 'AskStoreTransaction',
-                 'AskRebaseTransaction', 'AskRebaseTransaction',
+                 1, 1, 'AskStoreTransaction'), allow_recurse=True) as end:
-                 'AnswerRebaseTransaction', 'AnswerRebaseTransaction', 2
-                 )) as end:
            delay = f.delayAskFetchTransactions()
+            f.add(delayDeadlock)
            s1.start()
            self.tic()
            t1.commit()
            for t in threads:
                t.join()
        t4.begin()
-        self.assertEqual([15, 11, 13, 16], [r[x].value for x in 'abcd'])
+        self.assertPartitionTable(cluster, 'UU|UU')
-        self.assertEqual([2, 2], map(end.pop(2).count,
+        self.assertEqual([22, 5, 9], [r[x].value for x in 'abc'])
-            ['AskRebaseTransaction', 'AnswerRebaseTransaction']))
+        self.assertEqual(end.pop(3), [1])
-        self.assertEqual(end, {
+        self.assertEqual(sorted(end), [1, 2])
-            0: [1, 'AskStoreTransaction'],
-            1: ['AskStoreTransaction'],
-            3: [4, 'AskStoreTransaction'],
-        })
        self.assertFalse(s1.dm.getOrphanList())
    @with_cluster(replicas=1)
    def testNotifyReplicated2(self, cluster):
+        """
+        See comment in about using 'discard' instead of 'remove'
+        in TransactionManager._notifyReplicated of the storage node.
+        """
        s0, s1 = cluster.storage_list
        s1.stop()
        cluster.join((s1,))
@@ -2452,10 +2348,9 @@ class Test(NEOThreadedTest):
            r[x] = PCounterWithResolution()
            t1.commit()
        r['a'].value += 1
-        r['b'].value += 2
        t2, c2 = cluster.getTransaction()
        r = c2.root()
-        r['a'].value += 3
+        r['a'].value += 2
        r['b'].value += 4
        thread = self.newPausedThread(t2.commit)
        def t2_b(*args, **kw):
@@ -2464,17 +2359,15 @@ class Test(NEOThreadedTest):
            self.tic()
            self.assertPartitionTable(cluster, 'UO')
            yield 0
-        def t2_vote(*args, **kw):
+        def t1_vote(*args, **kw):
            self.tic()
-            self.assertPartitionTable(cluster, 'UU')
+            self.assertPartitionTable(cluster, 'UO')
-            yield 0
+            yield 1
        with ConnectionFilter() as f, \
             self.thread_switcher((thread,),
-                 (1, 0, 1, 1, t2_b, 0, 0, 1, t2_vote, 0, 0),
+                 (1, 0, 1, 1, t2_b, t1_vote, 1, 1),
-                 ('tpc_begin', 'tpc_begin', 1, 1, 2, 2,
+                 ('tpc_begin', 'tpc_begin', 1, 1, 2, 'AskStoreTransaction',
-                  'AskRebaseTransaction', 'AskRebaseTransaction',
+                  1,  'AskStoreTransaction',
-                  'AskStoreTransaction',
-                  'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
                  )) as end:
            delay = f.delayAskFetchTransactions()
            s1.start()
@@ -2482,164 +2375,39 @@ class Test(NEOThreadedTest):
            t1.commit()
            thread.join()
        t2.begin()
-        self.assertEqual([4, 6], [r[x].value for x in 'ab'])
+        self.assertPartitionTable(cluster, 'UU')
+        self.assertEqual([3, 4], [r[x].value for x in 'ab'])
+        self.assertFalse(end)
-    @with_cluster(replicas=1, partitions=2)
+    @with_cluster(replicas=1)
-    def testNotifyReplicated3(self, cluster):
+    def testLateLocklessWrite(self, cluster):
        s0, s1 = cluster.storage_list
-        t1, c1 = cluster.getTransaction()
-        r = c1.root()
-        r[''] = PCounter()
-        t1.commit()
        s1.stop()
        cluster.join((s1,))
        s1.resetNode()
-        nonlocal_ = [0]
-        class Abort(Exception):
-            pass
-        with cluster.newClient(1) as db, Patch(Replicator,
-                _nextPartitionSortKey=lambda orig, self, offset: offset):
-            t3, c3 = cluster.getTransaction(db)
-            with ConnectionFilter() as f, self.noConnection(c3, s0):
-                @f.delayAnswerFetchObjects
-                def delay(_):
-                    if nonlocal_:
-                        return nonlocal_.pop()
-                s1.start()
-                self.tic()
-                r[''].value += 1
-                r._p_changed = 1
-                t2, c2 = cluster.getTransaction()
-                c2.root()._p_changed = 1
-                def t1_rebase(*args, **kw):
-                    self.tic()
-                    f.remove(delay)
-                    yield 0
-                @self.newPausedThread
-                def commit23():
-                    t2.commit()
-                    c3.root()[''].value += 3
-                    with self.assertRaises(Abort) as cm:
-                        t3.commit()
-                    self.assertTrue(*cm.exception.args)
-                def t3_commit(txn):
-                    # Check that the storage hasn't answered to the store,
-                    # which means that a lock is still taken for r[''] by t1.
-                    self.tic()
-                    try:
-                        txn = txn.data(c3)
-                    except (AttributeError, KeyError): # BBB: ZODB < 5
-                        pass
-                    txn_context = db.storage.app._txn_container.get(txn)
-                    raise Abort(txn_context.queue.empty())
-                TransactionalResource(t3, 1, commit=t3_commit)
-                with self.thread_switcher((commit23,),
-                    (1, 1, 0, 0, t1_rebase, 0, 0, 0, 1, 1, 1, 1, 0),
-                    ('tpc_begin', 'tpc_begin', 0, 1, 0,
-                     'AskRebaseTransaction', 'AskRebaseTransaction',
-                     'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
-                     'AskStoreTransaction', 'tpc_begin', 1, 'tpc_abort',
-                     )) as end:
-                    self.assertRaises(POSException.ConflictError, t1.commit)
-                    commit23.join()
-        self.assertEqual(end, {0: ['tpc_abort']})
-        self.assertPartitionTable(cluster, 'UU|UU')
-    @with_cluster(partitions=2, storage_count=2)
-    def testConflictAfterLocklessWrite(self, cluster):
-        """
-        Show that in case of a write to an outdated cell, the client must
-        discard the answer if it comes after a resolved conflict, as the client
-        would not have the data anymore to solve a second conflict (deadlock
-        avoidance). This test reproduces a case where the storage node can't
-        easily return the correct data back to the client.
-        The scenario focuses on object A (oid 1) and node s0, which is
-        initially replicating partition 1:
-        1. t1 writes A: s1 conflicts and the answer from s0 is delayed
-        2. t1 writes B: a deadlock is triggered by s0 and internally, the write
-           of A is not considered lockless anymore
-        3. replication of partition 1 finishes: A is marked as locked normally
-           (which can be considered wrong but discarding the write at that
-            point is not trivial and anyway another write is coming)
-        4. t1 resolves A: s1 is not yet notified of the deadlock and accepts
-        5. t1 receives the answer for the first write of A to s1: if it didn't
-           discard it, it would mark the write of A as completed on all nodes
-        6. t1 starts resolving the deadlock, s0 conflicts for the second store
-           and returns that A needs to be rebased (like in 3, questionable)
-        7. the answer of s0 for the rebasing of A contains data from the first
-           write and it is processed first: this is not an issue if the client
-           still has the data (i.e. not moved to transaction cache, or
-           discarded because the cache is too small)
-        """
-        t1, c1 = cluster.getTransaction()
-        r = c1.root()
-        for x in 'ab':
-            r[x] = PCounterWithResolution()
-            t1.commit()
-        cluster.neoctl.setNumReplicas(1)
-        self.tic()
-        s0, s1 = cluster.sortStorageList()
        t1, c1 = cluster.getTransaction()
-        r = c1.root()
+        ob = c1.root()[''] = PCounterWithResolution()
-        r['a'].value += 1
+        t1.commit()
-        r['b'].value += 2
+        ob.value += 1
-        with cluster.newClient(1) as db, ConnectionFilter() as f:
+        t2, c2 = cluster.getTransaction()
-            delayReplication = f.delayAnswerFetchObjects()
+        ob = c2.root()['']
-            delayStore = f.delayAnswerStoreObject(lambda conn:
+        ob.value += 2
-                conn.uuid == cluster.client.uuid and
+        t2.commit()
-                self.getConnectionApp(conn) is s0)
+        def resolve(orig, *args):
-            delayDeadlock = f.delayNotifyDeadlock()
+            f.remove(d)
-            delayRebase = f.delayAnswerRebaseObject(lambda conn:
+            return orig(*args)
-                # to first process the one from s0
+        with ConnectionFilter() as f, \
+             Patch(PCounterWithResolution, _p_resolveConflict=resolve):
+            f.delayAskFetchTransactions()
+            d = f.delayAnswerStoreObject(lambda conn:
                self.getConnectionApp(conn) is s1)
-            cluster.neoctl.tweakPartitionTable()
+            s1.start()
            self.tic()
-            t2, c2 = cluster.getTransaction(db)
+            t1.commit()
-            r = c2.root()
+        t2.begin()
-            r['a'].value += 3 # for a first conflict on t1/s1
+        self.tic()
-            t2.commit()
+        self.assertEqual(ob.value, 3)
-            r['b'].value += 4 # for a deadlock on t1/s0
+        self.assertPartitionTable(cluster, 'UU')
-            r['a'].value += 5 # for a second conflict on t1/s0
-            def t1_b(*args, **kw):
-                self.tic() # make sure t2/b will be processed before t1/b
-                yield 0
-            def t1_resolve(*args, **kw):
-                f.remove(delayReplication)
-                self.tic()
-                yield 1
-                f.remove(delayStore)
-                self.tic()
-                f.remove(delayDeadlock)
-            def t2_vote(*args, **kw):
-                yield 0
-                # From now own, prefer reading from s0,
-                # in case that packets from s1 are blocked by the filter.
-                no_read.append(s1.uuid)
-            def t1_end(*args, **kw):
-                yield 0
-                f.remove(delayRebase)
-            commit2 = self.newPausedThread(t2.commit)
-            no_read = []
-            with cluster.client.extraCellSortKey(
-                    lambda cell: cell.getUUID() in no_read), \
-                 self.thread_switcher((commit2,),
-                (1, 1, 0, 0, t1_b, t1_resolve, 0, 0, 0, 0, 1, t2_vote, t1_end),
-                ('tpc_begin', 'tpc_begin', 2, 1, 2, 1, 1,
-                 'AskRebaseTransaction', 'AskRebaseTransaction',
-                 'AnswerRebaseTransaction', 'AnswerRebaseTransaction',
-                 'AskStoreTransaction')) as end:
-                t1.commit()
-                commit2.join()
-        t1.begin()
-        r = c1.root()
-        self.assertEqual(r['a'].value, 9)
-        self.assertEqual(r['b'].value, 6)
-        t1 = end.pop(0)
-        self.assertEqual(t1.pop(), 'AskStoreTransaction')
-        self.assertEqual(sorted(t1), [1, 2])
-        self.assertFalse(end)
-        self.assertPartitionTable(cluster, 'UU|UU')
    @with_cluster(start_cluster=0, storage_count=2, replicas=1)
    def testLocklessWriteDuringConflictResolution(self, cluster):
@@ -2651,39 +2419,35 @@ class Test(NEOThreadedTest):
        work in itself, the code was hazardous and the client can't easily
        discard such "positive" answers, or process them early enough.
-        The scenario focuses on transaction t1 (storing oid 1)
+        The scenario focuses on transaction t2 (storing oid 1)
        and node s1 (initially replicating the only partition):
-        1. t1 stores: conflict on s0, lockless write on s1
+        1. t2 stores: conflict on s0, lockless write on s1
-        2. t2 stores: locked on s0, lockless write on s1
+        2. t1 stores: locked on s0, lockless write on s1
-        3. t1 resolves: deadlock on s0, packet to s1 delayed
+        3. t2 resolves: waiting for write-lock on s0, packet to s1 delayed
-        4. t2 commits: on s1, a single lockless write remains and the storage
+        4. Partition 0 replicated. Multiple lockless writes block notification
-           node notifies the master that it is UP_TO_DATE
+           to master.
-        5. s1 receives the second store from t1: answer delayed
+        5. t1 commits: t2 conflicts on s0, a single lockless write remains
-        6. t2 begins a new transaction
+           on s1, and s1 notifies the master that it is UP_TO_DATE
-        7. t1 resolves the deadlock: conflict on s0, s1 asks to rebase
+        6. s1 receives the second store from t2: answer delayed
-        8. t2 stores and vote
+        7. while t2 is resolving the conflict: answer from s1 processed, s0 down
-        9. s0 down
+        8. conflict on s1
-        10. while t2 finishes, t1 starts solving the conflict and due to the
+        9. t2 resolves a last time and votes
-            way packets are processed, it proceeds as follows:
-          a. answer in step 5 is received but not processed
-          b. data asked, but not for the last serial
-             (so there will be yet another conflict to solve)
-          c. new data is stored: client waiting for s1
-          d. answer in step 5 is processed
        What happened before:
-        4. t1 still has the lock
+        5. t1 still has the lock
-        5. locked ("Transaction storing more than once")
+        6. locked ("Transaction storing more than once")
-        10d. store considered successful, and the data won't be there anymore
+        9. t2 already processed a successful store from s1 and crashes
-             for the actual conflict
+           (KeyError in Transaction.written)
-        -> assertion failure
        Now that the storage nodes discards lockless writes that actually
        conflict:
-        4. t1 does not have the lock anymore
+        5. t1 does not have the lock anymore
-        5. conflict
+        6. conflict
-        10d. ignored (conflict already resolved)
+        9. just after vote, t2 aborts because the only storage node that's
-        -> transaction aborted normally
+           RUNNING from the beginning got lockless writes (note that the client
+           currently tracks them as a list of partitions: if it was a list
+           of oids, it could finish the transaction because oid 1 ended up
+           being stored normally)
        """
        s0, s1 = cluster.storage_list
        cluster.start(storage_list=(s0,))
@@ -2692,13 +2456,14 @@ class Test(NEOThreadedTest):
        t1.commit()
        x1.value += 1
        with cluster.newClient(1) as db, ConnectionFilter() as f:
+            client = db.storage.app
            delayReplication = f.delayAnswerFetchObjects()
            delayed = []
            delayStore = f.delayAskStoreObject(lambda conn:
                conn.uuid in delayed and
-                self.getConnectionApp(conn) is cluster.client)
+                self.getConnectionApp(conn) is client)
            delayStored = f.delayAnswerStoreObject(lambda conn:
-                conn.uuid == cluster.client.uuid and
+                conn.uuid == client.uuid and
                self.getConnectionApp(conn).uuid in delayed)
            def load(orig, oid, at_tid, before_tid):
                if delayed:
@@ -2716,38 +2481,36 @@ class Test(NEOThreadedTest):
            t2, c2 = cluster.getTransaction(db)
            x2 = c2.root()['']
            x2.value += 2
-            t2.commit()
+            t1.commit()
-            x2.value += 4
+            x1.value += 4
-            def tic1(*args, **kw):
+            def tic2(*args, **kw):
-                yield 1
+                yield 0
                self.tic()
-            def t1_resolve(*args, **kw):
+            def t2_resolve(*args, **kw):
                delayed.append(s1.uuid)
                f.remove(delayReplication)
                self.tic()
-                yield 1
-                self.tic()
-            def t2_begin(*args, **kw):
-                f.remove(delayStore)
                yield 0
-            @self.newPausedThread
+                self.tic()
-            def commit2():
+            commit2 = self.newPausedThread(t2.commit)
-                t2.commit()
+            with Patch(client, _loadFromStorage=load) as p, \
-                x2.value += 8
-                t2.commit()
-            with Patch(cluster.client, _loadFromStorage=load) as p, \
                 self.thread_switcher((commit2,),
-                (1, 0, tic1, 0, t1_resolve, 1, t2_begin, 0, 1, 1, 0),
+                    (1, 1, tic2, 1, t2_resolve, 1, 1, 1),
-                ('tpc_begin', 'tpc_begin', 1, 1, 1, 'AskStoreTransaction',
+                    ('tpc_begin', 'tpc_begin', 1, 1, 1, 'AskStoreTransaction',
-                 'tpc_begin', 'AskRebaseTransaction', 'AskRebaseTransaction',
+                     1, 'AskStoreTransaction')) as end:
-                 1, 'AskStoreTransaction')) as end:
+                t1.commit()
+                f.remove(delayStore)
                self.assertRaisesRegexp(NEOStorageError,
                                        '^partition 0 not fully write-locked$',
-                                        t1.commit)
+                                        commit2.join)
-                commit2.join()
+            t2.begin()
-        t1.begin()
+            self.assertEqual(x2.value, 5)
-        self.assertEqual(x1.value, 14)
        self.assertPartitionTable(cluster, 'OU')
+        self.assertEqual(end, {1: [
+            'AskVoteTransaction', # no such packet sent in reality
+                # (the fact that it appears here is only an artefact
+                #  between implementation detail and thread_switcher)
+            'tpc_abort']})
    @with_cluster(partitions=2, storage_count=2)
    def testUnstore(self, cluster):
@@ -2803,132 +2566,6 @@ class Test(NEOThreadedTest):
        self.assertEqual(end, {0: [2, 2, 'AskStoreTransaction']})
        self.assertFalse(s1.dm.getOrphanList())
-    @with_cluster(storage_count=2, partitions=2)
-    def testDeadlockAvoidanceBeforeInvolvingAnotherNode(self, cluster):
-        t1, c1 = cluster.getTransaction()
-        r = c1.root()
-        for x in 'abc':
-            r[x] = PCounterWithResolution()
-            t1.commit()
-        r['a'].value += 1
-        r['c'].value += 2
-        r['b'].value += 3
-        t2, c2 = cluster.getTransaction()
-        r = c2.root()
-        r['c'].value += 4
-        r['a'].value += 5
-        r['b'].value += 6
-        t = self.newPausedThread(t2.commit)
-        def t1_b(*args, **kw):
-            yield 1
-            self.tic()
-        with self.thread_switcher((t,), (1, 0, 1, 0, t1_b, 0, 0, 0, 1),
-            ('tpc_begin', 'tpc_begin', 1, 3, 3, 1, 'AskRebaseTransaction',
-             2, 'AnswerRebaseTransaction')) as end:
-            t1.commit()
-            t.join()
-        t2.begin()
-        self.assertEqual([6, 9, 6], [r[x].value for x in 'abc'])
-        self.assertEqual([2, 2], map(end.pop(1).count,
-            ['AskRebaseTransaction', 'AnswerRebaseTransaction']))
-        # Rarely, there's an extra deadlock for t1:
-        # 0: ['AnswerRebaseTransaction', 'AskRebaseTransaction',
-        #     'AskRebaseTransaction', 'AnswerRebaseTransaction',
-        #     'AnswerRebaseTransaction', 2, 3, 1,
-        #     'AskStoreTransaction', 'VoteTransaction']
-        self.assertEqual(end.pop(0)[0], 'AnswerRebaseTransaction')
-        self.assertFalse(end)
-    @with_cluster()
-    def testDelayedStoreOrdering(self, cluster):
-        """
-        By processing delayed stores (EventQueue) in the order of their locking
-        tid, we minimize the number deadlocks. Here, we trigger a first
-        deadlock, so that the delayed check for t1 is reordered after that of
-        t3.
-        """
-        t1, c1 = cluster.getTransaction()
-        r = c1.root()
-        for x in 'abcd':
-            r[x] = PCounter()
-            t1.commit()
-        r['a'].value += 1
-        self.readCurrent(r['d'])
-        t2, c2 = cluster.getTransaction()
-        r = c2.root()
-        r['b'].value += 1
-        self.readCurrent(r['d'])
-        t3, c3 = cluster.getTransaction()
-        r = c3.root()
-        r['c'].value += 1
-        self.readCurrent(r['d'])
-        threads = map(self.newPausedThread, (t2.commit, t3.commit))
-        with self.thread_switcher(threads, (1, 2, 0, 1, 2, 1, 0, 2, 0, 1, 2),
-            ('tpc_begin', 'tpc_begin', 'tpc_begin', 1, 2, 3, 4, 4, 4,
-             'AskRebaseTransaction', 'AskStoreTransaction')) as end:
-            t1.commit()
-            for t in threads:
-                t.join()
-        self.assertEqual(end, {
-            0: ['AnswerRebaseTransaction', 'AskStoreTransaction'],
-            2: ['AskStoreTransaction']})
-    @with_cluster(replicas=1)
-    def testConflictAfterDeadlockWithSlowReplica1(self, cluster,
-                                                  slow_rebase=False):
-        t1, c1 = cluster.getTransaction()
-        r = c1.root()
-        for x in 'ab':
-            r[x] = PCounterWithResolution()
-            t1.commit()
-        r['a'].value += 1
-        r['b'].value += 2
-        s1 = cluster.storage_list[1]
-        with cluster.newClient(1) as db, \
-             (s1.filterConnection(cluster.client) if slow_rebase else
-              cluster.client.filterConnection(s1)) as f, \
-             cluster.client.extraCellSortKey(lambda cell:
-                cell.getUUID() == s1.uuid):
-            t2, c2 = cluster.getTransaction(db)
-            r = c2.root()
-            r['a'].value += 3
-            self.readCurrent(r['b'])
-            t = self.newPausedThread(t2.commit)
-            def tic_t1(*args, **kw):
-                yield 0
-                self.tic()
-            def tic_t2(*args, **kw):
-                yield 1
-                self.tic()
-            def load(orig, *args, **kw):
-                f.remove(delayStore)
-                return orig(*args, **kw)
-            order = [tic_t2, 0, tic_t2, 1, tic_t1, 0, 0, 0, 1, tic_t1, 0]
-            def t1_resolve(*args, **kw):
-                yield
-                f.remove(delay)
-            if slow_rebase:
-                order.append(t1_resolve)
-                delay = f.delayAnswerRebaseObject()
-            else:
-                order[-1] = t1_resolve
-                delay = f.delayAskStoreObject()
-            with self.thread_switcher((t,), order,
-                ('tpc_begin', 'tpc_begin', 1, 1, 2, 2, 'AskRebaseTransaction',
-                'AskRebaseTransaction', 'AnswerRebaseTransaction',
-                'AskStoreTransaction')) as end:
-                t1.commit()
-                t.join()
-            self.assertNotIn(delay, f)
-            t2.begin()
-            end[0].sort(key=str)
-            self.assertEqual(end, {0: [1, 'AnswerRebaseTransaction',
-                                       'AskStoreTransaction']})
-            self.assertEqual([4, 2], [r[x].value for x in 'ab'])
-    def testConflictAfterDeadlockWithSlowReplica2(self):
-        self.testConflictAfterDeadlockWithSlowReplica1(True)
    @with_cluster(start_cluster=0, master_count=3)
    def testElection(self, cluster):
        m0, m1, m2 = cluster.master_list