Commit f49bf829 authored by Julien Muchembled's avatar Julien Muchembled

master: check backup master behaves properly when upstream master is unreachable

This fixes the following assertion failure:

PACKET #0x021d Ping                           > M1 (10.0.114.64:14001)
INFO   timeout for #0x0000021d with <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=0, handler=BackupHandler) at 2955590>
DEBUG  connection closed for <ClientConnection(uuid=M1, address=10.0.114.64:14001, closed=1, handler=BackupHandler) at 2955590>
ERROR  upstream master is down: connection lost
ERROR  Pre-mortem data:
ERROR  Traceback (most recent call last):
ERROR    File "neo/master/app.py", line 134, in run
ERROR      self._run()
ERROR    File "neo/master/app.py", line 154, in _run
ERROR      self.playPrimaryRole()
ERROR    File "neo/master/app.py", line 344, in playPrimaryRole
ERROR      self.backup_app.provideService()
ERROR    File "neo/master/backup_app.py", line 129, in provideService
ERROR      conn.close()
ERROR    File "neo/lib/connection.py", line 541, in close
ERROR      assert not self.isPending()
ERROR  AssertionError
parent b81ae60a
...@@ -550,12 +550,14 @@ class Connection(BaseConnection): ...@@ -550,12 +550,14 @@ class Connection(BaseConnection):
self._on_close = None self._on_close = None
del self.write_buf[:] del self.write_buf[:]
self.read_buf.clear() self.read_buf.clear()
if self.connecting: try:
handler.connectionFailed(self) if self.connecting:
self.connecting = False handler.connectionFailed(self)
else: self.connecting = False
handler.connectionClosed(self) else:
self._handlers.clear() handler.connectionClosed(self)
finally:
self._handlers.clear()
def _closure(self): def _closure(self):
assert self.connector is not None, self.whoSetConnector() assert self.connector is not None, self.whoSetConnector()
......
...@@ -220,6 +220,41 @@ class ReplicationTests(NEOThreadedTest): ...@@ -220,6 +220,41 @@ class ReplicationTests(NEOThreadedTest):
finally: finally:
upstream.stop() upstream.stop()
def testBackupUpstreamMasterDead(self):
"""Check proper behaviour when upstream master is unreachable
More generally, this checks that when a handler raises when a connection
is closed voluntarily, the connection is in a consistent state and can
be, for example, closed again after the exception is catched, without
assertion failure.
"""
upstream = NEOCluster()
try:
upstream.start()
importZODB = upstream.importZODB()
backup = NEOCluster(upstream=upstream)
try:
backup.start()
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
backup.tic()
conn, = backup.master.getConnectionList(upstream.master)
# trigger ping
conn.updateTimeout(1)
self.assertFalse(conn.isPending())
conn.checkTimeout(time.time())
self.assertTrue(conn.isPending())
# force ping to have expired
conn.updateTimeout(1)
# connection will be closed before upstream master has time
# to answer
backup.tic(force=1)
new_conn, = backup.master.getConnectionList(upstream.master)
self.assertFalse(new_conn is conn)
finally:
backup.stop()
finally:
upstream.stop()
def testReplicationAbortedBySource(self): def testReplicationAbortedBySource(self):
""" """
Check that a feeding node aborts replication when its partition is Check that a feeding node aborts replication when its partition is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment