Commit f39babe5 authored by Julien Muchembled's avatar Julien Muchembled

Remove UNKNOWN node state

parent 23b6a66a
......@@ -32,7 +32,7 @@ class Node(object):
id_timestamp = None
def __init__(self, manager, address=None, uuid=None,
state=NodeStates.UNKNOWN):
state=NodeStates.TEMPORARILY_DOWN):
self._state = state
self._address = address
self._uuid = uuid
......
......@@ -123,7 +123,6 @@ def NodeStates():
TEMPORARILY_DOWN
DOWN
PENDING
UNKNOWN
@Enum
def CellStates():
......@@ -150,7 +149,6 @@ node_state_prefix_dict = {
NodeStates.TEMPORARILY_DOWN: 'T',
NodeStates.DOWN: 'D',
NodeStates.PENDING: 'P',
NodeStates.UNKNOWN: 'U',
}
# used for logging
......
......@@ -18,9 +18,7 @@ from ..app import monotonic_time
from neo.lib import logging
from neo.lib.exception import StoppedOperation
from neo.lib.handler import EventHandler
from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets,
ProtocolError,
)
from neo.lib.protocol import Packets
class MasterHandler(EventHandler):
"""This class implements a generic part of the event handlers."""
......@@ -66,10 +64,6 @@ class MasterHandler(EventHandler):
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList()))
DISCONNECTED_STATE_DICT = {
NodeTypes.STORAGE: NodeStates.TEMPORARILY_DOWN,
}
class BaseServiceHandler(MasterHandler):
"""This class deals with events for a service phase."""
......@@ -84,17 +78,17 @@ class BaseServiceHandler(MasterHandler):
return # for example, when a storage is removed by an admin
assert node.isStorage(), node
logging.info('storage node lost')
new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN)
assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
NodeStates.DOWN), (uuid_str(self.app.uuid),
node.whoSetState(), new_state)
was_pending = node.isPending()
node.setState(new_state)
if was_pending:
if node.isPending():
# was in pending state, so drop it from the node manager to forget
# it and do not set in running state when it comes back
logging.info('drop a pending node from the node manager')
app.nm.remove(node)
node.setDown()
elif node.isTemporarilyDown():
# Already put in TEMPORARILY_DOWN state
# by AdministrationHandler.setNodeState
return
else:
node.setTemporarilyDown()
app.broadcastNodesInformation([node])
if app.truncate_tid:
raise StoppedOperation
......
......@@ -34,8 +34,8 @@ CLUSTER_STATE_WORKFLOW = {
ClusterStates.STARTING_BACKUP),
}
NODE_STATE_WORKFLOW = {
NodeTypes.MASTER: (NodeStates.UNKNOWN,),
NodeTypes.STORAGE: (NodeStates.UNKNOWN, NodeStates.DOWN),
NodeTypes.MASTER: (NodeStates.TEMPORARILY_DOWN,),
NodeTypes.STORAGE: (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN),
}
class AdministrationHandler(MasterHandler):
......@@ -95,7 +95,7 @@ class AdministrationHandler(MasterHandler):
message = ('state changed' if state_changed else
'node already in %s state' % state)
if node.isStorage():
keep = state == NodeStates.UNKNOWN
keep = state == NodeStates.TEMPORARILY_DOWN
try:
cell_list = app.pt.dropNodeList([node], keep)
except PartitionTableException, e:
......
......@@ -91,5 +91,5 @@ class PrimaryHandler(ElectionHandler):
conn, timestamp, node_list)
for node_type, _, uuid, state, _ in node_list:
assert node_type == NodeTypes.MASTER, node_type
if uuid == self.app.uuid and state == NodeStates.UNKNOWN:
if uuid == self.app.uuid and state == NodeStates.TEMPORARILY_DOWN:
sys.exit()
......@@ -157,7 +157,7 @@ class NeoCTL(BaseApplication):
return self.setClusterState(ClusterStates.VERIFYING)
def killNode(self, node):
return self._setNodeState(node, NodeStates.UNKNOWN)
return self._setNodeState(node, NodeStates.TEMPORARILY_DOWN)
def dropNode(self, node):
return self._setNodeState(node, NodeStates.DOWN)
......
......@@ -56,8 +56,7 @@ class BaseMasterHandler(BaseHandler):
if uuid == self.app.uuid:
# This is me, do what the master tell me
logging.info("I was told I'm %s", state)
if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
NodeStates.UNKNOWN):
if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN):
erase = state == NodeStates.DOWN
self.app.shutdown(erase=erase)
elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
......
......@@ -609,10 +609,6 @@ class NEOCluster(object):
self.expectStorageState(process.getUUID(), NodeStates.PENDING,
*args, **kw)
def expectUnknown(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), NodeStates.UNKNOWN,
*args, **kw)
def expectUnavailable(self, process, *args, **kw):
self.expectStorageState(process.getUUID(),
NodeStates.TEMPORARILY_DOWN, *args, **kw)
......@@ -679,7 +675,6 @@ class NEOCluster(object):
self.expectCondition(callback, *args, **kw)
def expectStorageNotKnown(self, process, *args, **kw):
# /!\ Not Known != Unknown
process_uuid = process.getUUID()
def expected_storage_not_known(last_try):
for storage in self.getStorageList():
......
......@@ -48,7 +48,7 @@ class ClusterTests(NEOFunctionalTest):
neo.stop()
neo.run(except_storages=(s2, ))
neo.expectPending(s1)
neo.expectUnknown(s2)
neo.expectUnavailable(s2)
neo.expectClusterRecovering()
# Starting missing storage allows cluster to exit Recovery without
# neoctl action.
......@@ -61,11 +61,11 @@ class ClusterTests(NEOFunctionalTest):
neo.stop()
neo.run(except_storages=(s2, ))
neo.expectPending(s1)
neo.expectUnknown(s2)
neo.expectUnavailable(s2)
neo.expectClusterRecovering()
neo.startCluster()
neo.expectRunning(s1)
neo.expectUnknown(s2)
neo.expectUnavailable(s2)
neo.expectClusterRunning()
def testClusterBreaks(self):
......
......@@ -59,7 +59,7 @@ class MasterTests(NEOFunctionalTest):
self.assertEqual(len(killed_uuid_list), 1)
uuid = killed_uuid_list[0]
# Check the state of the primary we just killed
self.neo.expectMasterState(uuid, (None, NodeStates.UNKNOWN))
self.neo.expectMasterState(uuid, (None, NodeStates.TEMPORARILY_DOWN))
# BUG: The following check expects neoctl to reconnect before
# the election finishes.
self.assertEqual(self.neo.getPrimary(), None)
......
......@@ -409,7 +409,7 @@ class StorageTests(NEOFunctionalTest):
# restart the cluster with the first storage killed
self.neo.run(except_storages=[started[1]])
self.neo.expectPending(started[0])
self.neo.expectUnknown(started[1])
self.neo.expectUnavailable(started[1])
self.neo.expectClusterRecovering()
# Cluster doesn't know there are outdated cells
self.neo.expectOudatedCells(number=0)
......
......@@ -35,7 +35,7 @@ class NodesTests(NeoUnitTestBase):
address = ('127.0.0.1', 10000)
uuid = self.getNewUUID(None)
node = Node(self.nm, address=address, uuid=uuid)
self.assertEqual(node.getState(), NodeStates.UNKNOWN)
self.assertEqual(node.getState(), NodeStates.TEMPORARILY_DOWN)
self.assertEqual(node.getAddress(), address)
self.assertEqual(node.getUUID(), uuid)
self.assertTrue(time() - 1 < node.getLastStateChange() < time())
......@@ -43,7 +43,7 @@ class NodesTests(NeoUnitTestBase):
def testState(self):
""" Check if the last changed time is updated when state is changed """
node = Node(self.nm)
self.assertEqual(node.getState(), NodeStates.UNKNOWN)
self.assertEqual(node.getState(), NodeStates.TEMPORARILY_DOWN)
self.assertTrue(time() - 1 < node.getLastStateChange() < time())
previous_time = node.getLastStateChange()
node.setState(NodeStates.RUNNING)
......@@ -161,7 +161,7 @@ class NodeManagerTests(NeoUnitTestBase):
(NodeTypes.STORAGE, self.storage.getAddress(), new_uuid,
NodeStates.RUNNING, None),
(NodeTypes.ADMIN, self.admin.getAddress(), self.admin.getUUID(),
NodeStates.UNKNOWN, None),
NodeStates.TEMPORARILY_DOWN, None),
)
app = Mock()
app.pt = Mock({'dropNode': True})
......@@ -180,9 +180,9 @@ class NodeManagerTests(NeoUnitTestBase):
new_storage = storage_list[0]
self.assertNotEqual(new_storage.getUUID(), old_uuid)
self.assertEqual(new_storage.getState(), NodeStates.RUNNING)
# admin is still here but in UNKNOWN state
# admin is still here but in TEMPORARILY_DOWN state
self.checkNodes([self.master, self.admin, new_storage])
self.assertEqual(self.admin.getState(), NodeStates.UNKNOWN)
self.assertEqual(self.admin.getState(), NodeStates.TEMPORARILY_DOWN)
class MasterDBTests(NeoUnitTestBase):
......
......@@ -34,7 +34,7 @@ class PartitionTableTests(NeoUnitTestBase):
# check getter
self.assertEqual(cell.getNode(), sn)
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
self.assertEqual(cell.getNodeState(), NodeStates.UNKNOWN)
self.assertEqual(cell.getNodeState(), NodeStates.TEMPORARILY_DOWN)
self.assertEqual(cell.getUUID(), uuid)
self.assertEqual(cell.getAddress(), server)
# check state setter
......
......@@ -552,7 +552,8 @@ class Test(NEOThreadedTest):
# restart it with one storage only
if 1:
cluster.start(storage_list=(s1,))
self.assertEqual(NodeStates.UNKNOWN, cluster.getNodeState(s2))
self.assertEqual(NodeStates.TEMPORARILY_DOWN,
cluster.getNodeState(s2))
@with_cluster(storage_count=2, partitions=2, replicas=1)
def testRestartStoragesWithReplicas(self, cluster):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment