Commit a81824a0 authored by Grégory Wisniewski's avatar Grégory Wisniewski

First step for manual cluster startup implementation :

The recovery stage is used only one time, when switching to primary state.
Each time the cluster lost the operational status (a cell has no up to date
node), the verification stage restart.


git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@794 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent db31b672
...@@ -528,23 +528,20 @@ class Application(object): ...@@ -528,23 +528,20 @@ class Application(object):
"""Verify the data in storage nodes and clean them up, if necessary.""" """Verify the data in storage nodes and clean them up, if necessary."""
logging.info('start to verify data') logging.info('start to verify data')
em = self.em em, nm = self.em, self.nm
nm = self.nm self.changeClusterState(protocol.VERIFYING)
# Wait ask/request primary master exchange with the last storage node # wait for any missing node
# because it have to be in the verification state while not self.pt.operational():
t = time()
while time() < t + 1:
em.poll(1) em.poll(1)
self.changeClusterState(protocol.VERIFYING)
# FIXME this part has a potential problem that the write buffers can # FIXME this part has a potential problem that the write buffers can
# be very huge. Thus it would be better to flush the buffers from time # be very huge. Thus it would be better to flush the buffers from time
# to time _without_ reading packets. # to time _without_ reading packets.
# Send the current partition table to storage and admin nodes, so that # Send the current partition table to storage and admin nodes, so that
# all nodes share the same view. # all nodes share the same view.
# FIXME: the admin must ask itself the partition table
for conn in em.getConnectionList(): for conn in em.getConnectionList():
uuid = conn.getUUID() uuid = conn.getUUID()
if uuid is not None: if uuid is not None:
...@@ -690,15 +687,14 @@ class Application(object): ...@@ -690,15 +687,14 @@ class Application(object):
if node.getState() == RUNNING_STATE: if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE) node.setState(TEMPORARILY_DOWN_STATE)
# recover the cluster status at startup
self.recoverStatus()
while 1: while 1:
recovering = True try:
while recovering: self.verifyData()
self.recoverStatus() except VerificationFailure:
recovering = False continue
try:
self.verifyData()
except VerificationFailure:
recovering = True
self.provideService() self.provideService()
def playSecondaryRole(self): def playSecondaryRole(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment