Commit afd801c8 authored by Julien Muchembled's avatar Julien Muchembled

qa: fix random RuntimeError when starting cluster in testClusterStartup

parent bdf6389d
...@@ -64,6 +64,9 @@ class AdministrationHandler(MasterHandler): ...@@ -64,6 +64,9 @@ class AdministrationHandler(MasterHandler):
for node in storage_list: for node in storage_list:
assert node.isPending(), node assert node.isPending(), node
if node.getConnection().isPending(): if node.getConnection().isPending():
# XXX: It's wrong to use ProtocolError here. We must reply
# less aggressively because the admin has no way to
# know that there's still pending activity.
raise ProtocolError('Cannot exit recovery now: node %r is ' raise ProtocolError('Cannot exit recovery now: node %r is '
'entering cluster' % (node, )) 'entering cluster' % (node, ))
app._startup_allowed = True app._startup_allowed = True
......
...@@ -416,6 +416,18 @@ class NEOCluster(object): ...@@ -416,6 +416,18 @@ class NEOCluster(object):
if not pdb.wait(test, MAX_START_TIME): if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster') raise AssertionError('Timeout when starting cluster')
def startCluster(self):
# Even if the storage nodes are in the expected state, there may still
# be activity between them and the master, preventing the cluster to
# start.
def start(last_try):
try:
self.neoctl.startCluster()
except (NotReadyException, RuntimeError):
return False, e
return True, None
self.expectCondition(start)
def stop(self, clients=True): def stop(self, clients=True):
# Suspend all processes to kill before actually killing them, so that # Suspend all processes to kill before actually killing them, so that
# nodes don't log errors because they get disconnected from other nodes: # nodes don't log errors because they get disconnected from other nodes:
......
...@@ -31,7 +31,6 @@ class ClusterTests(NEOFunctionalTest): ...@@ -31,7 +31,6 @@ class ClusterTests(NEOFunctionalTest):
def testClusterStartup(self): def testClusterStartup(self):
neo = self.neo = NEOCluster(['test_neo1', 'test_neo2'], replicas=1, neo = self.neo = NEOCluster(['test_neo1', 'test_neo2'], replicas=1,
temp_dir=self.getTempDirectory()) temp_dir=self.getTempDirectory())
neoctl = neo.neoctl
neo.run() neo.run()
# Runing a new cluster doesn't exit Recovery state. # Runing a new cluster doesn't exit Recovery state.
s1, s2 = neo.getStorageProcessList() s1, s2 = neo.getStorageProcessList()
...@@ -40,7 +39,7 @@ class ClusterTests(NEOFunctionalTest): ...@@ -40,7 +39,7 @@ class ClusterTests(NEOFunctionalTest):
neo.expectClusterRecovering() neo.expectClusterRecovering()
# When allowing cluster to exit Recovery, it reaches Running state and # When allowing cluster to exit Recovery, it reaches Running state and
# all present storage nodes reach running state. # all present storage nodes reach running state.
neoctl.startCluster() neo.startCluster()
neo.expectRunning(s1) neo.expectRunning(s1)
neo.expectRunning(s2) neo.expectRunning(s2)
neo.expectClusterRunning() neo.expectClusterRunning()
...@@ -64,7 +63,7 @@ class ClusterTests(NEOFunctionalTest): ...@@ -64,7 +63,7 @@ class ClusterTests(NEOFunctionalTest):
neo.expectPending(s1) neo.expectPending(s1)
neo.expectUnknown(s2) neo.expectUnknown(s2)
neo.expectClusterRecovering() neo.expectClusterRecovering()
neoctl.startCluster() neo.startCluster()
neo.expectRunning(s1) neo.expectRunning(s1)
neo.expectUnknown(s2) neo.expectUnknown(s2)
neo.expectClusterRunning() neo.expectClusterRunning()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment