master: check backup master behaves properly when upstream master is unreachable

This fixes the following assertion failure: PACKET #0x021d Ping > M1 (10.0.114.64:14001) INFO timeout for #0x0000021d with <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=0, handler=BackupHandler) at 2955590> DEBUG connection closed for <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=1, handler=BackupHandler) at 2955590> ERROR upstream master is down: connection lost ERROR Pre-mortem data: ERROR Traceback (most recent call last): ERROR File "neo/master/app.py", line 134, in run ERROR self._run() ERROR File "neo/master/app.py", line 154, in _run ERROR self.playPrimaryRole() ERROR File "neo/master/app.py", line 344, in playPrimaryRole ERROR self.backup_app.provideService() ERROR File "neo/master/backup_app.py", line 129, in provideService ERROR conn.close() ERROR File "neo/lib/connection.py", line 541, in close ERROR assert not self.isPending() ERROR AssertionError

master: check backup master behaves properly when upstream master is unreachable
This fixes the following assertion failure: PACKET #0x021d Ping > M1 (10.0.114.64:14001) INFO timeout for #0x0000021d with <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=0, handler=BackupHandler) at 2955590> DEBUG connection closed for <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=1, handler=BackupHandler) at 2955590> ERROR upstream master is down: connection lost ERROR Pre-mortem data: ERROR Traceback (most recent call last): ERROR File "neo/master/app.py", line 134, in run ERROR self._run() ERROR File "neo/master/app.py", line 154, in _run ERROR self.playPrimaryRole() ERROR File "neo/master/app.py", line 344, in playPrimaryRole ERROR self.backup_app.provideService() ERROR File "neo/master/backup_app.py", line 129, in provideService ERROR conn.close() ERROR File "neo/lib/connection.py", line 541, in close ERROR assert not self.isPending() ERROR AssertionError
f49bf829 · Julien Muchembled · b81ae60a · f49bf829 · f49bf829
Commit f49bf829 authored Aug 12, 2012 by Julien Muchembled
Show whitespace changes
Inline Side-by-side

Showing with 43 additions and 6 deletions

neo/lib/connection.py neo/lib/connection.py +8 -6

neo/tests/threaded/testReplication.py neo/tests/threaded/testReplication.py +35 -0

No files found.
--- a/neo/lib/connection.py
+++ b/neo/lib/connection.py
@@ -550,11 +550,13 @@ class Connection(BaseConnection):
            self._on_close = None
        del self.write_buf[:]
        self.read_buf.clear()
+        try:
            if self.connecting:
                handler.connectionFailed(self)
                self.connecting = False
            else:
                handler.connectionClosed(self)
+        finally:
            self._handlers.clear()

    def _closure(self):

--- a/neo/tests/threaded/testReplication.py
+++ b/neo/tests/threaded/testReplication.py
@@ -220,6 +220,41 @@ class ReplicationTests(NEOThreadedTest):
        finally:
            upstream.stop()

+    def testBackupUpstreamMasterDead(self):
+        """Check proper behaviour when upstream master is unreachable
+
+        More generally, this checks that when a handler raises when a connection
+        is closed voluntarily, the connection is in a consistent state and can
+        be, for example, closed again after the exception is catched, without
+        assertion failure.
+        """
+        upstream = NEOCluster()
+        try:
+            upstream.start()
+            importZODB = upstream.importZODB()
+            backup = NEOCluster(upstream=upstream)
+            try:
+                backup.start()
+                backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
+                backup.tic()
+                conn, = backup.master.getConnectionList(upstream.master)
+                # trigger ping
+                conn.updateTimeout(1)
+                self.assertFalse(conn.isPending())
+                conn.checkTimeout(time.time())
+                self.assertTrue(conn.isPending())
+                # force ping to have expired
+                conn.updateTimeout(1)
+                # connection will be closed before upstream master has time
+                # to answer
+                backup.tic(force=1)
+                new_conn, = backup.master.getConnectionList(upstream.master)
+                self.assertFalse(new_conn is conn)
+            finally:
+                backup.stop()
+        finally:
+            upstream.stop()
+
    def testReplicationAbortedBySource(self):
        """
        Check that a feeding node aborts replication when its partition is