master: fix crash of backup master when disconnected from upstream while serving clients

This fixes: Traceback (most recent call last): File "neo/master/app.py", line 172, in run self._run() File "neo/master/app.py", line 182, in _run self.playPrimaryRole() File "neo/master/app.py", line 314, in playPrimaryRole self.backup_app.provideService()) File "neo/master/backup_app.py", line 101, in provideService app.changeClusterState(ClusterStates.STARTING_BACKUP) File "neo/master/app.py", line 474, in changeClusterState ) or not node.isClient(), (state, node) AssertionError: (<EnumItem STARTING_BACKUP (4)>, <ClientNode(uuid=C1, state=RUNNING, connection=<ServerConnection(nid=C1, address=127.0.0.1:52430, handler=ClientReadOnlyServiceHandler, fd=59, on_close=onConnectionClosed, server) at 7f38f5628390>) at 7f38f5628ad0>)

master: fix crash of backup master when disconnected from upstream while serving clients
This fixes: Traceback (most recent call last): File "neo/master/app.py", line 172, in run self._run() File "neo/master/app.py", line 182, in _run self.playPrimaryRole() File "neo/master/app.py", line 314, in playPrimaryRole self.backup_app.provideService()) File "neo/master/backup_app.py", line 101, in provideService app.changeClusterState(ClusterStates.STARTING_BACKUP) File "neo/master/app.py", line 474, in changeClusterState ) or not node.isClient(), (state, node) AssertionError: (<EnumItem STARTING_BACKUP (4)>, <ClientNode(uuid=C1, state=RUNNING, connection=<ServerConnection(nid=C1, address=127.0.0.1:52430, handler=ClientReadOnlyServiceHandler, fd=59, on_close=onConnectionClosed, server) at 7f38f5628390>) at 7f38f5628ad0>)
7e8ca9ec · Julien Muchembled · e2b11d54 · 7e8ca9ec · 7e8ca9ec
Commit 7e8ca9ec authored Jan 10, 2020 by Julien Muchembled
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 1 deletion

neo/master/backup_app.py neo/master/backup_app.py +2 -0

neo/tests/threaded/testReplication.py neo/tests/threaded/testReplication.py +10 -1

No files found.
--- a/neo/master/backup_app.py
+++ b/neo/master/backup_app.py
@@ -136,6 +136,8 @@ class BackupApplication(object):
                        del self.pt
                    except AttributeError:
                        pass
+                    for node in app.nm.getClientList(True):
+                        node.getConnection().close()
            except StateChangedException, e:
                if e.args[0] != ClusterStates.STOPPING_BACKUP:
                    raise

--- a/neo/tests/threaded/testReplication.py
+++ b/neo/tests/threaded/testReplication.py
@@ -1128,11 +1128,12 @@ class ReplicationTests(NEOThreadedTest):
        # S -> Sb link stops working during [cutoff, recover) test iterations
        cutoff  = 4
        recover = 7
+        loop = 10
        def delayReplication(conn, packet):
            return isinstance(packet, Packets.AnswerFetchTransactions)

        with ConnectionFilter() as f:
-            for i in xrange(10):
+            for i in xrange(loop):
                if i == cutoff:
                    f.add(delayReplication)
                if i == recover:
@@ -1202,6 +1203,14 @@ class ReplicationTests(NEOThreadedTest):
                # thus not ReadOnlyError
                self.assertRaises(NEOStorageError, Zb.tpc_vote, txn)

+                if i == loop // 2:
+                    # Check that we survive a disconnection from upstream
+                    # when we are serving clients. The client must be
+                    # disconnected before leaving BACKINGUP state.
+                    conn, = U.master.getConnectionList(B.master)
+                    conn.close()
+                    self.tic()
+
                # close storage because client app is otherwise shared in
                # threaded tests and we need to refresh last_tid on next run
                # (XXX see above about invalidations not working)