Go back/stay in RECOVERING state when the partition table can't be operational

This fixes several cases where the partition table could become corrupt and the whole cluster being stuck in VERIFYING state. This also reduces the probability the have cells out of date when restarting several storage nodes simultaneously. At last, if a master node becomes primary again, a cluster must not be started automatically if nodes with readable cells are missing, in order to avoid a split of the database. This could happen if this master node was previously forced to start it.

Go back/stay in RECOVERING state when the partition table can't be operational
This fixes several cases where the partition table could become corrupt and the whole cluster being stuck in VERIFYING state. This also reduces the probability the have cells out of date when restarting several storage nodes simultaneously. At last, if a master node becomes primary again, a cluster must not be started automatically if nodes with readable cells are missing, in order to avoid a split of the database. This could happen if this master node was previously forced to start it.
e1f9a7da · Julien Muchembled · 7eb7cf1b · e1f9a7da · e1f9a7da · e1f9a7da
Commit e1f9a7da authored Nov 25, 2015 by Julien Muchembled
15 changed files
--- a/neo/lib/protocol.py
+++ b/neo/lib/protocol.py
@@ -775,6 +775,8 @@ class StartOperation(Packet):
    this message, it must not serve client nodes. PM -> S.
    """
    _fmt = PStruct('start_operation',
+        # XXX: Is this boolean needed ? Maybe this
+        #      can be deduced from cluster state.
        PBoolean('backup'),
    )


--- a/neo/master/app.py
+++ b/neo/master/app.py
@@ -77,7 +77,6 @@ class Application(BaseApplication):
        self.primary = None
        self.primary_master_node = None
        self.cluster_state = None
-        self._startup_allowed = False

        uuid = config.getUUID()
        if uuid:
@@ -254,7 +253,6 @@ class Application(BaseApplication):
            ptid = self.pt.setNextID()
            packet = Packets.NotifyPartitionChanges(ptid, cell_list)
            for node in self.nm.getIdentifiedList():
-                # TODO: notify masters
                if node.isRunning() and not node.isMaster():
                    node.notify(packet)

@@ -278,8 +276,13 @@ class Application(BaseApplication):
            if e.args[0] != ClusterStates.STARTING_BACKUP:
                raise
            self.backup_tid = tid = self.getLastTransaction()
-            self.pt.setBackupTidDict({node.getUUID(): tid
-                for node in self.nm.getStorageList(only_identified=True)})
+            packet = Packets.StartOperation(True)
+            tid_dict = {}
+            for node in self.nm.getStorageList(only_identified=True):
+                tid_dict[node.getUUID()] = tid
+                if node.isRunning():
+                    node.notify(packet)
+            self.pt.setBackupTidDict(tid_dict)

    def playPrimaryRole(self):
        logging.info('play the primary role with %r', self.listening_conn)
@@ -323,30 +326,44 @@ class Application(BaseApplication):
                    in_conflict)
                in_conflict.setUUID(None)

-        # recover the cluster status at startup
+        # Do not restart automatically if ElectionFailure is raised, in order
+        # to avoid a split of the database. For example, with 2 machines with
+        # a master and a storage on each one and replicas=1, the secondary
+        # master becomes primary in case of network failure between the 2
+        # machines but must not start automatically: otherwise, each storage
+        # node would diverge.
+        self._startup_allowed = False
        try:
-            self.runManager(RecoveryManager)
            while True:
-                self.runManager(VerificationManager)
+                self.runManager(RecoveryManager)
+                # Automatic restart if we become non-operational.
+                self._startup_allowed = True
                try:
-                    if self.backup_tid:
-                        if self.backup_app is None:
-                            raise RuntimeError("No upstream cluster to backup"
-                                               " defined in configuration")
-                        self.backup_app.provideService()
-                        # Reset connection with storages (and go through a
-                        # recovery phase) when leaving backup mode in order
-                        # to get correct last oid/tid.
-                        self.runManager(RecoveryManager)
-                        continue
-                    self.provideService()
+                    self.runManager(VerificationManager)
+                    if not self.backup_tid:
+                        self.provideService()
+                        # self.provideService only returns without raising
+                        # when switching to backup mode.
+                    if self.backup_app is None:
+                        raise RuntimeError("No upstream cluster to backup"
+                                           " defined in configuration")
+                    self.backup_app.provideService()
+                    # All connections to storages are aborted when leaving
+                    # backup mode so restart loop completely (recovery).
+                    continue
                except OperationFailure:
                    logging.critical('No longer operational')
+                node_list = []
                for node in self.nm.getIdentifiedList():
                    if node.isStorage() or node.isClient():
-                        node.notify(Packets.StopOperation())
+                        conn = node.getConnection()
+                        conn.notify(Packets.StopOperation())
                        if node.isClient():
-                            node.getConnection().abort()
+                            conn.abort()
+                        elif node.isRunning():
+                            node.setPending()
+                            node_list.append(node)
+                self.broadcastNodesInformation(node_list)
        except StateChangedException, e:
            assert e.args[0] == ClusterStates.STOPPING
            self.shutdown()

--- a/neo/master/handlers/__init__.py
+++ b/neo/master/handlers/__init__.py
@@ -15,6 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 from neo.lib import logging
+from neo.lib.exception import OperationFailure
 from neo.lib.handler import EventHandler
 from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets,
    BrokenNodeDisallowedError,
@@ -94,15 +95,13 @@ DISCONNECTED_STATE_DICT = {
 class BaseServiceHandler(MasterHandler):
    """This class deals with events for a service phase."""

-    def nodeLost(self, conn, node):
-        # This method provides a hook point overridable by service classes.
-        # It is triggered when a connection to a node gets lost.
-        pass
-
    def connectionLost(self, conn, new_state):
-        node = self.app.nm.getByUUID(conn.getUUID())
+        app = self.app
+        node = app.nm.getByUUID(conn.getUUID())
        if node is None:
            return # for example, when a storage is removed by an admin
+        assert node.isStorage(), node
+        logging.info('storage node lost')
        if new_state != NodeStates.BROKEN:
            new_state = DISCONNECTED_STATE_DICT.get(node.getType(),
                    NodeStates.DOWN)
@@ -117,10 +116,11 @@ class BaseServiceHandler(MasterHandler):
            # was in pending state, so drop it from the node manager to forget
            # it and do not set in running state when it comes back
            logging.info('drop a pending node from the node manager')
-            self.app.nm.remove(node)
-        self.app.broadcastNodesInformation([node])
-        # clean node related data in specialized handlers
-        self.nodeLost(conn, node)
+            app.nm.remove(node)
+        app.broadcastNodesInformation([node])
+        app.broadcastPartitionChanges(app.pt.outdate(node))
+        if not app.pt.operational():
+            raise OperationFailure("cannot continue operation")

    def notifyReady(self, conn):
        self.app.setStorageReady(conn.getUUID())

--- a/neo/master/handlers/storage.py
+++ b/neo/master/handlers/storage.py
@@ -34,15 +34,16 @@ class StorageServiceHandler(BaseServiceHandler):
        if node.isRunning():
            conn.notify(Packets.StartOperation(bool(app.backup_tid)))

-    def nodeLost(self, conn, node):
-        logging.info('storage node lost')
-        assert not node.isRunning(), node.getState()
+    def connectionLost(self, conn, new_state):
        app = self.app
-        app.broadcastPartitionChanges(app.pt.outdate(node))
-        if not app.pt.operational():
-            raise OperationFailure, 'cannot continue operation'
+        node = app.nm.getByUUID(conn.getUUID())
+        super(StorageServiceHandler, self).connectionLost(conn, new_state)
        app.tm.forget(conn.getUUID())
-        if app.getClusterState() == ClusterStates.BACKINGUP:
+        if (app.getClusterState() == ClusterStates.BACKINGUP
+            # Also check if we're exiting, because backup_app is not usable
+            # in this case. Maybe cluster state should be set to something
+            # else, like STOPPING, during cleanup (__del__/close).
+            and app.listening_conn):
            app.backup_app.nodeLost(node)
        if app.packing is not None:
            self.answerPack(conn, False)

--- a/neo/master/pt.py
+++ b/neo/master/pt.py
@@ -299,15 +299,19 @@ class PartitionTable(neo.lib.pt.PartitionTable):
                    yield offset, cell
                    break

-    def getReadableCellNodeSet(self):
+    def getOperationalNodeSet(self):
        """
        Return a set of all nodes which are part of at least one UP TO DATE
-        partition.
+        partition. An empty list is returned if these nodes aren't enough to
+        become operational.
        """
-        return {cell.getNode()
-            for row in self.partition_list
-            for cell in row
-            if cell.isReadable()}
+        node_set = set()
+        for row in self.partition_list:
+            if not any(cell.isReadable() and cell.getNode().isPending()
+                       for cell in row):
+                return () # not operational
+            node_set.update(cell.getNode() for cell in row if cell.isReadable())
+        return node_set

    def clearReplicating(self):
        for row in self.partition_list:

--- a/neo/master/recovery.py
+++ b/neo/master/recovery.py
@@ -51,7 +51,7 @@ class RecoveryManager(MasterHandler):
        app = self.app
        pt = app.pt
        app.changeClusterState(ClusterStates.RECOVERING)
-        pt.setID(None)
+        pt.clear()

        # collect the last partition table available
        poll = app.em.poll
@@ -60,7 +60,7 @@ class RecoveryManager(MasterHandler):
            if pt.filled():
                # A partition table exists, we are starting an existing
                # cluster.
-                node_list = pt.getReadableCellNodeSet()
+                node_list = pt.getOperationalNodeSet()
                if app._startup_allowed:
                    node_list = [node for node in node_list if node.isPending()]
                elif not all(node.isPending() for node in node_list):
@@ -91,10 +91,16 @@ class RecoveryManager(MasterHandler):
            # reset IDs generators & build new partition with running nodes
            app.tm.setLastOID(ZERO_OID)
            pt.make(node_list)
-            self._broadcastPartitionTable(pt.getID(), pt.getRowList())
-        elif app.backup_tid:
-            pt.setBackupTidDict(self.backup_tid_dict)
-            app.backup_tid = pt.getBackupTid()
+            self._notifyAdmins(Packets.SendPartitionTable(
+                pt.getID(), pt.getRowList()))
+        else:
+            cell_list = pt.outdate()
+            if cell_list:
+                self._notifyAdmins(Packets.NotifyPartitionChanges(
+                    pt.setNextID(), cell_list))
+            if app.backup_tid:
+                pt.setBackupTidDict(self.backup_tid_dict)
+                app.backup_tid = pt.getBackupTid()

        app.setLastTransaction(app.tm.getLastTID())
        logging.debug('cluster starts with loid=%s and this partition table :',
@@ -132,20 +138,15 @@ class RecoveryManager(MasterHandler):
            logging.warn('Got %s while waiting %s', dump(ptid),
                    dump(self.target_ptid))
        else:
-            self._broadcastPartitionTable(ptid, row_list)
+            try:
+                new_nodes = self.app.pt.load(ptid, row_list, self.app.nm)
+            except IndexError:
+                raise ProtocolError('Invalid offset')
+            self._notifyAdmins(Packets.NotifyNodeInformation(new_nodes),
+                               Packets.SendPartitionTable(ptid, row_list))
            self.app.backup_tid = self.backup_tid_dict[conn.getUUID()]

-    def _broadcastPartitionTable(self, ptid, row_list):
-        try:
-            new_nodes = self.app.pt.load(ptid, row_list, self.app.nm)
-        except IndexError:
-            raise ProtocolError('Invalid offset')
-        else:
-            notification = Packets.NotifyNodeInformation(new_nodes)
-            ptid = self.app.pt.getID()
-            row_list = self.app.pt.getRowList()
-            partition_table = Packets.SendPartitionTable(ptid, row_list)
-            # notify the admin nodes
-            for node in self.app.nm.getAdminList(only_identified=True):
-                node.notify(notification)
-                node.notify(partition_table)
+    def _notifyAdmins(self, *packets):
+        for node in self.app.nm.getAdminList(only_identified=True):
+            for packet in packets:
+                node.notify(packet)
--- a/neo/master/verification.py
+++ b/neo/master/verification.py
@@ -21,21 +21,7 @@ from neo.lib.protocol import ClusterStates, Packets, NodeStates
 from .handlers import BaseServiceHandler


-class VerificationFailure(Exception):
-    """
-        Exception raised each time the cluster integrity failed.
-          - An required storage node is missing
-          - A transaction or an object is missing on a node
-    """
-    pass
-
-
 class VerificationManager(BaseServiceHandler):
-    """
-      Manager for verification step of a NEO cluster:
-        - Wait for at least one available storage per partition
-        - Check if all expected content is present
-    """

    def __init__(self, app):
        self._locked_dict = {}
@@ -44,18 +30,13 @@ class VerificationManager(BaseServiceHandler):

    def _askStorageNodesAndWait(self, packet, node_list):
        poll = self.app.em.poll
-        operational = self.app.pt.operational
        uuid_set = self._uuid_set
        uuid_set.clear()
        for node in node_list:
            uuid_set.add(node.getUUID())
            node.ask(packet)
-        while True:
+        while uuid_set:
            poll(1)
-            if not operational():
-                raise VerificationFailure
-            if not uuid_set:
-                break

    def getHandler(self):
        return self
@@ -76,26 +57,13 @@ class VerificationManager(BaseServiceHandler):
        return state, self

    def run(self):
-        self.app.changeClusterState(ClusterStates.VERIFYING)
-        while True:
-            try:
-                self.verifyData()
-            except VerificationFailure:
-                continue
-            break
-        # At this stage, all non-working nodes are out-of-date.
-        self.app.broadcastPartitionChanges(self.app.pt.outdate())
+        app = self.app
+        app.changeClusterState(ClusterStates.VERIFYING)
+        if not app.backup_tid:
+            self.verifyData()

    def verifyData(self):
        app = self.app
-
-        # wait for any missing node
-        logging.debug('waiting for the cluster to be operational')
-        while not app.pt.operational():
-            app.em.poll(1)
-        if app.backup_tid:
-            return
-
        logging.info('start to verify data')
        getIdentifiedList = app.nm.getIdentifiedList

@@ -156,7 +124,6 @@ class VerificationManager(BaseServiceHandler):
    def connectionCompleted(self, conn):
        pass

-    def nodeLost(self, conn, node):
-        if not self.app.pt.operational():
-            raise VerificationFailure, 'cannot continue verification'
-
+    def connectionLost(self, conn, new_state):
+        self._uuid_set.discard(conn.getUUID())
+        super(VerificationManager, self).connectionLost(conn, new_state)
--- a/neo/storage/handlers/master.py
+++ b/neo/storage/handlers/master.py
@@ -16,13 +16,20 @@

 from neo.lib import logging
 from neo.lib.util import dump
-from neo.lib.protocol import Packets, ProtocolError
+from neo.lib.protocol import Packets, ProtocolError, ZERO_TID
 from . import BaseMasterHandler


 class MasterOperationHandler(BaseMasterHandler):
    """ This handler is used for the primary master """

+    def startOperation(self, conn, backup):
+        # XXX: see comment in protocol
+        assert self.app.operational and backup
+        dm = self.app.dm
+        if not dm.getBackupTID():
+            dm.setBackupTID(dm.getLastIDs()[0] or ZERO_TID)
+
    def notifyTransactionFinished(self, conn, *args, **kw):
        self.app.replicator.transactionFinished(*args, **kw)


--- a/neo/storage/handlers/verification.py
+++ b/neo/storage/handlers/verification.py
@@ -49,6 +49,7 @@ class VerificationHandler(BaseMasterHandler):

    def startOperation(self, conn, backup):
        self.app.operational = True
+        # XXX: see comment in protocol
        dm = self.app.dm
        if backup:
            if dm.getBackupTID():

--- a/neo/tests/functional/testCluster.py
+++ b/neo/tests/functional/testCluster.py
@@ -77,7 +77,7 @@ class ClusterTests(NEOFunctionalTest):
        self.neo.expectClusterRunning()
        self.neo.expectOudatedCells(number=0)
        self.neo.killStorage()
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()

    def testClusterBreaksWithTwoNodes(self):
        self.neo = NEOCluster(['test_neo1', 'test_neo2'],
@@ -88,7 +88,7 @@ class ClusterTests(NEOFunctionalTest):
        self.neo.expectClusterRunning()
        self.neo.expectOudatedCells(number=0)
        self.neo.killStorage()
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()

    def testClusterDoesntBreakWithTwoNodesOneReplica(self):
        self.neo = NEOCluster(['test_neo1', 'test_neo2'],
@@ -127,7 +127,7 @@ class ClusterTests(NEOFunctionalTest):
        self.assertEqual(len(self.neo.getClientlist()), 1)
        # drop the storage, the cluster is no more operational...
        self.neo.getStorageProcessList()[0].stop()
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()
        # ...and the client gets disconnected
        self.assertEqual(len(self.neo.getClientlist()), 0)
        # restart storage so that the cluster is operational again

--- a/neo/tests/functional/testStorage.py
+++ b/neo/tests/functional/testStorage.py
@@ -179,7 +179,7 @@ class StorageTests(NEOFunctionalTest):
        # Cluster not operational anymore. Only cells of second storage that
        # were shared with the third one should become outdated.
        self.neo.expectUnavailable(started[1])
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()
        self.neo.expectOudatedCells(3)

    def testVerificationTriggered(self):
@@ -200,7 +200,7 @@ class StorageTests(NEOFunctionalTest):
        # stop it, the cluster must switch to verification
        started[0].stop()
        self.neo.expectUnavailable(started[0])
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()
        # client must have been disconnected
        self.assertEqual(len(self.neo.getClientlist()), 0)
        conn.close()
@@ -245,7 +245,7 @@ class StorageTests(NEOFunctionalTest):
        self.neo.expectUnavailable(started[1])
        self.neo.expectUnavailable(started[2])
        self.neo.expectOudatedCells(number=20)
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()

    def testConflictingStorageRejected(self):
        """ Check that a storage coming after the recovery process with the same
@@ -403,7 +403,7 @@ class StorageTests(NEOFunctionalTest):
        self.neo.expectUnavailable(started[0])
        self.neo.expectUnavailable(started[1])
        self.neo.expectOudatedCells(number=10)
-        self.neo.expectClusterVerifying()
+        self.neo.expectClusterRecovering()
        # XXX: need to sync with storages first
        self.neo.stop()


--- a/neo/tests/master/testStorageHandler.py
+++ b/neo/tests/master/testStorageHandler.py
@@ -177,60 +177,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
        self.assertEqual(node2.getState(), state)
        self.assertEqual(lptid, self.app.pt.getID())

-    def test_nodeLostAfterAskLockInformation(self):
-        # 2 storage nodes, one will die
-        node1, conn1 = self._getStorage()
-        node2, conn2 = self._getStorage()
-        # client nodes, to distinguish answers for the sample transactions
-        client1, cconn1 = self._getClient()
-        client2, cconn2 = self._getClient()
-        client3, cconn3 = self._getClient()
-        oid_list = [self.getOID(), ]
-
-        # Some shortcuts to simplify test code
-        self.app.pt = Mock({'operational': True})
-
-        # Register some transactions
-        tm = self.app.tm
-        # Transaction 1: 2 storage nodes involved, one will die and the other
-        # already answered node lock
-        msg_id_1 = 1
-        ttid1 = tm.begin(client1)
-        tid1 = tm.prepare(ttid1, 1, oid_list,
-            [node1.getUUID(), node2.getUUID()], msg_id_1)
-        tm.lock(ttid1, node2.getUUID())
-        # storage 1 request a notification at commit
-        tm. registerForNotification(node1.getUUID())
-        self.checkNoPacketSent(cconn1)
-        # Storage 1 dies
-        node1.setTemporarilyDown()
-        self.service.nodeLost(conn1, node1)
-        # T1: last locking node lost, client receives AnswerTransactionFinished
-        self.checkAnswerTransactionFinished(cconn1)
-        self.checkNotifyTransactionFinished(conn1)
-        self.checkNotifyUnlockInformation(conn2)
-        # ...and notifications are sent to other clients
-        self.checkInvalidateObjects(cconn2)
-        self.checkInvalidateObjects(cconn3)
-
-        # Transaction 2: 2 storage nodes involved, one will die
-        msg_id_2 = 2
-        ttid2 = tm.begin(node1)
-        tid2 = tm.prepare(ttid2, 1, oid_list,
-            [node1.getUUID(), node2.getUUID()], msg_id_2)
-        # T2: pending locking answer, client keeps waiting
-        self.checkNoPacketSent(cconn2, check_notify=False)
-        tm.remove(node1.getUUID(), ttid2)
-
-        # Transaction 3: 1 storage node involved, which won't die
-        msg_id_3 = 3
-        ttid3 = tm.begin(node1)
-        tid3 = tm.prepare(ttid3, 1, oid_list,
-            [node2.getUUID(), ], msg_id_3)
-        # T3: action not significant to this transacion, so no response
-        self.checkNoPacketSent(cconn3, check_notify=False)
-        tm.remove(node1.getUUID(), ttid3)
-
    def test_answerPack(self):
        # Note: incomming status has no meaning here, so it's left to False.
        node1, conn1 = self._getStorage()

--- a/neo/tests/threaded/__init__.py
+++ b/neo/tests/threaded/__init__.py
@@ -33,7 +33,7 @@ from neo.lib import logging
 from neo.lib.connection import BaseConnection, Connection
 from neo.lib.connector import SocketConnector, ConnectorException
 from neo.lib.locking import SimpleQueue
-from neo.lib.protocol import CellStates, ClusterStates, NodeStates, NodeTypes
+from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes
 from neo.lib.util import cached_property, parseMasterList, p64
 from .. import NeoTestBase, Patch, getTempDirectory, setupMySQLdb, \
    ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, DB_PREFIX, DB_USER
@@ -478,6 +478,8 @@ class ConnectionFilter(object):
                        queue.appendleft(packet)
                        break
                else:
+                    if conn.isClosed():
+                        return
                    cls._addPacket(conn, packet)
                    continue
                break
@@ -731,9 +733,12 @@ class NEOCluster(object):
                return node[3]

    def getOutdatedCells(self):
-        return [cell for row in self.neoctl.getPartitionRowList()[1]
-                     for cell in row[1]
-                     if cell[1] == CellStates.OUT_OF_DATE]
+        # Ask the admin instead of the primary master to check that it is
+        # notified of every change.
+        return [(i, cell.getUUID())
+            for i, row in enumerate(self.admin.pt.partition_list)
+            for cell in row
+            if not cell.isReadable()]

    def getZODBStorage(self, **kw):
        kw['_app'] = kw.pop('client', self.client)

--- a/neo/tests/threaded/test.py
+++ b/neo/tests/threaded/test.py
@@ -394,6 +394,62 @@ class Test(NEOThreadedTest):
        finally:
            cluster.stop()

+    def testRestartStoragesWithReplicas(self):
+        """
+        Check that the master must discard its partition table when the
+        cluster is not operational anymore. Which means that it must go back
+        to RECOVERING state and remain there as long as the partition table
+        can't be operational.
+        This also checks that if the master remains the primary one after going
+        back to recovery, it automatically starts the cluster if possible
+        (i.e. without manual intervention).
+        """
+        outdated = []
+        def doOperation(orig):
+            outdated.append(cluster.getOutdatedCells())
+            orig()
+        def stop():
+            with cluster.master.filterConnection(s0) as m2s0:
+                m2s0.add(lambda conn, packet:
+                    isinstance(packet, Packets.NotifyPartitionChanges))
+                s1.stop()
+                cluster.join((s1,))
+                self.assertEqual(getClusterState(), ClusterStates.RUNNING)
+                self.assertEqual(cluster.getOutdatedCells(),
+                                 [(0, s1.uuid), (1, s1.uuid)])
+                s0.stop()
+                cluster.join((s0,))
+            self.assertNotEqual(getClusterState(), ClusterStates.RUNNING)
+            s0.resetNode()
+            s1.resetNode()
+        cluster = NEOCluster(storage_count=2, partitions=2, replicas=1)
+        try:
+            cluster.start()
+            s0, s1 = cluster.storage_list
+            getClusterState = cluster.neoctl.getClusterState
+            if 1:
+                # Scenario 1: When all storage nodes are restarting,
+                # we want a chance to not restart with outdated cells.
+                stop()
+                with Patch(s1, doOperation=doOperation):
+                    s0.start()
+                    s1.start()
+                    self.tic()
+                self.assertEqual(getClusterState(), ClusterStates.RUNNING)
+                self.assertEqual(outdated, [[]])
+            if 1:
+                # Scenario 2: When only the first storage node to be stopped
+                # is started, the cluster must be able to restart.
+                stop()
+                s1.start()
+                self.tic()
+                # The master doesn't wait for s0 to come back.
+                self.assertEqual(getClusterState(), ClusterStates.RUNNING)
+                self.assertEqual(cluster.getOutdatedCells(),
+                                [(0, s0.uuid), (1, s0.uuid)])
+        finally:
+            cluster.stop()
+
    def testVerificationCommitUnfinishedTransactions(self):
        """ Verification step should commit locked transactions """
        def delayUnlockInformation(conn, packet):
@@ -588,12 +644,17 @@ class Test(NEOThreadedTest):
            cluster.start()
            # prevent storage to reconnect, in order to easily test
            # that cluster becomes non-operational
-            storage.connectToPrimary = sys.exit
-            # send an unexpected to master so it aborts connection to storage
-            storage.master_conn.answer(Packets.Pong())
+            with Patch(storage, connectToPrimary=sys.exit):
+                # send an unexpected to master so it aborts connection to storage
+                storage.master_conn.answer(Packets.Pong())
+                self.tic()
+            self.assertEqual(cluster.neoctl.getClusterState(),
+                             ClusterStates.RECOVERING)
+            storage.resetNode()
+            storage.start()
            self.tic()
            self.assertEqual(cluster.neoctl.getClusterState(),
-                             ClusterStates.VERIFYING)
+                             ClusterStates.RUNNING)
        finally:
            cluster.stop()


--- a/neo/tests/threaded/testReplication.py
+++ b/neo/tests/threaded/testReplication.py
@@ -424,7 +424,7 @@ class ReplicationTests(NEOThreadedTest):
            check(ClusterStates.RUNNING, 1)
            cluster.neoctl.checkReplicas(check_dict, ZERO_TID, None)
            self.tic()
-            check(ClusterStates.VERIFYING, 4)
+            check(ClusterStates.RECOVERING, 4)
        finally:
            checker.CHECK_COUNT = CHECK_COUNT
            cluster.stop()