Commit f49bf829 authored by Julien Muchembled's avatar Julien Muchembled

master: check backup master behaves properly when upstream master is unreachable

This fixes the following assertion failure:

PACKET #0x021d Ping                           > M1 (10.0.114.64:14001)
INFO   timeout for #0x0000021d with <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=0, handler=BackupHandler) at 2955590>
DEBUG  connection closed for <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=1, handler=BackupHandler) at 2955590>
ERROR  upstream master is down: connection lost
ERROR  Pre-mortem data:
ERROR  Traceback (most recent call last):
ERROR    File "neo/master/app.py", line 134, in run
ERROR      self._run()
ERROR    File "neo/master/app.py", line 154, in _run
ERROR      self.playPrimaryRole()
ERROR    File "neo/master/app.py", line 344, in playPrimaryRole
ERROR      self.backup_app.provideService()
ERROR    File "neo/master/backup_app.py", line 129, in provideService
ERROR      conn.close()
ERROR    File "neo/lib/connection.py", line 541, in close
ERROR      assert not self.isPending()
ERROR  AssertionError
parent b81ae60a
......@@ -550,11 +550,13 @@ class Connection(BaseConnection):
self._on_close = None
del self.write_buf[:]
self.read_buf.clear()
try:
if self.connecting:
handler.connectionFailed(self)
self.connecting = False
else:
handler.connectionClosed(self)
finally:
self._handlers.clear()
def _closure(self):
......
......@@ -220,6 +220,41 @@ class ReplicationTests(NEOThreadedTest):
finally:
upstream.stop()
def testBackupUpstreamMasterDead(self):
"""Check proper behaviour when upstream master is unreachable
More generally, this checks that when a handler raises when a connection
is closed voluntarily, the connection is in a consistent state and can
be, for example, closed again after the exception is catched, without
assertion failure.
"""
upstream = NEOCluster()
try:
upstream.start()
importZODB = upstream.importZODB()
backup = NEOCluster(upstream=upstream)
try:
backup.start()
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
backup.tic()
conn, = backup.master.getConnectionList(upstream.master)
# trigger ping
conn.updateTimeout(1)
self.assertFalse(conn.isPending())
conn.checkTimeout(time.time())
self.assertTrue(conn.isPending())
# force ping to have expired
conn.updateTimeout(1)
# connection will be closed before upstream master has time
# to answer
backup.tic(force=1)
new_conn, = backup.master.getConnectionList(upstream.master)
self.assertFalse(new_conn is conn)
finally:
backup.stop()
finally:
upstream.stop()
def testReplicationAbortedBySource(self):
"""
Check that a feeding node aborts replication when its partition is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment