Commit 9d7f9795 authored by Julien Muchembled's avatar Julien Muchembled

Remove BROKEN node state

parent b8210d58
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
- Clarify node state signification, and consider renaming them in the code. - Clarify node state signification, and consider renaming them in the code.
Ideas: Ideas:
TEMPORARILY_DOWN becomes UNAVAILABLE TEMPORARILY_DOWN becomes UNAVAILABLE
BROKEN is removed ?
- Clarify the use of each error codes: - Clarify the use of each error codes:
- NOT_READY removed (connection kept opened until ready) - NOT_READY removed (connection kept opened until ready)
- Split PROTOCOL_ERROR (BAD IDENTIFICATION, ...) - Split PROTOCOL_ERROR (BAD IDENTIFICATION, ...)
......
...@@ -99,7 +99,6 @@ class ConnectionPool(object): ...@@ -99,7 +99,6 @@ class ConnectionPool(object):
return conn return conn
def removeConnection(self, node): def removeConnection(self, node):
"""Explicitly remove connection when a node is broken."""
self.connection_dict.pop(node.getUUID(), None) self.connection_dict.pop(node.getUUID(), None)
def closeAll(self): def closeAll(self):
......
...@@ -141,7 +141,6 @@ class HandlerSwitcher(object): ...@@ -141,7 +141,6 @@ class HandlerSwitcher(object):
connection.send(Packets.Notify( connection.send(Packets.Notify(
'Unexpected answer: %r' % packet)) 'Unexpected answer: %r' % packet))
connection.abort() connection.abort()
# handler.peerBroken(connection)
finally: finally:
# apply a pending handler if no more answers are pending # apply a pending handler if no more answers are pending
while len(pending) > 1 and not pending[0][0]: while len(pending) > 1 and not pending[0][0]:
......
...@@ -20,9 +20,8 @@ from operator import itemgetter ...@@ -20,9 +20,8 @@ from operator import itemgetter
from . import logging from . import logging
from .connection import ConnectionClosed from .connection import ConnectionClosed
from .protocol import ( from .protocol import (
NodeStates, Packets, Errors, BackendNotImplemented, NodeStates, Packets, Errors, BackendNotImplemented, NonReadableCell,
BrokenNodeDisallowedError, NonReadableCell, NotReadyError, NotReadyError, PacketMalformedError, ProtocolError, UnexpectedPacketError)
PacketMalformedError, ProtocolError, UnexpectedPacketError)
from .util import cached_property from .util import cached_property
...@@ -59,7 +58,6 @@ class EventHandler(object): ...@@ -59,7 +58,6 @@ class EventHandler(object):
logging.error(message) logging.error(message)
conn.answer(Errors.ProtocolError(message)) conn.answer(Errors.ProtocolError(message))
conn.abort() conn.abort()
# self.peerBroken(conn)
def dispatch(self, conn, packet, kw={}): def dispatch(self, conn, packet, kw={}):
"""This is a helper method to handle various packet types.""" """This is a helper method to handle various packet types."""
...@@ -80,11 +78,6 @@ class EventHandler(object): ...@@ -80,11 +78,6 @@ class EventHandler(object):
except PacketMalformedError, e: except PacketMalformedError, e:
logging.error('malformed packet from %r: %s', conn, e) logging.error('malformed packet from %r: %s', conn, e)
conn.close() conn.close()
# self.peerBroken(conn)
except BrokenNodeDisallowedError:
if not conn.isClosed():
conn.answer(Errors.BrokenNode('go away'))
conn.abort()
except NotReadyError, message: except NotReadyError, message:
if not conn.isClosed(): if not conn.isClosed():
if not message.args: if not message.args:
...@@ -146,11 +139,6 @@ class EventHandler(object): ...@@ -146,11 +139,6 @@ class EventHandler(object):
logging.debug('connection closed for %r', conn) logging.debug('connection closed for %r', conn)
self.connectionLost(conn, NodeStates.TEMPORARILY_DOWN) self.connectionLost(conn, NodeStates.TEMPORARILY_DOWN)
#def peerBroken(self, conn):
# """Called when a peer is broken."""
# logging.error('%r is broken', conn)
# # NodeStates.BROKEN
def connectionLost(self, conn, new_state): def connectionLost(self, conn, new_state):
""" this is a method to override in sub-handlers when there is no need """ this is a method to override in sub-handlers when there is no need
to make distinction from the kind event that closed the connection """ to make distinction from the kind event that closed the connection """
...@@ -216,9 +204,6 @@ class EventHandler(object): ...@@ -216,9 +204,6 @@ class EventHandler(object):
def timeoutError(self, conn, message): def timeoutError(self, conn, message):
logging.error('timeout error: %s', message) logging.error('timeout error: %s', message)
def brokenNodeDisallowedError(self, conn, message):
raise RuntimeError, 'broken node disallowed error: %s' % (message,)
def ack(self, conn, message): def ack(self, conn, message):
logging.debug("no error message: %s", message) logging.debug("no error message: %s", message)
...@@ -268,7 +253,6 @@ class AnswerBaseHandler(EventHandler): ...@@ -268,7 +253,6 @@ class AnswerBaseHandler(EventHandler):
timeoutExpired = unexpectedInAnswerHandler timeoutExpired = unexpectedInAnswerHandler
connectionClosed = unexpectedInAnswerHandler connectionClosed = unexpectedInAnswerHandler
packetReceived = unexpectedInAnswerHandler packetReceived = unexpectedInAnswerHandler
peerBroken = unexpectedInAnswerHandler
protocolError = unexpectedInAnswerHandler protocolError = unexpectedInAnswerHandler
def acceptIdentification(*args): def acceptIdentification(*args):
......
...@@ -70,7 +70,6 @@ def ErrorCodes(): ...@@ -70,7 +70,6 @@ def ErrorCodes():
TID_NOT_FOUND TID_NOT_FOUND
OID_DOES_NOT_EXIST OID_DOES_NOT_EXIST
PROTOCOL_ERROR PROTOCOL_ERROR
BROKEN_NODE
REPLICATION_ERROR REPLICATION_ERROR
CHECKING_ERROR CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED BACKEND_NOT_IMPLEMENTED
...@@ -123,7 +122,6 @@ def NodeStates(): ...@@ -123,7 +122,6 @@ def NodeStates():
RUNNING RUNNING
TEMPORARILY_DOWN TEMPORARILY_DOWN
DOWN DOWN
BROKEN
PENDING PENDING
UNKNOWN UNKNOWN
...@@ -151,7 +149,6 @@ node_state_prefix_dict = { ...@@ -151,7 +149,6 @@ node_state_prefix_dict = {
NodeStates.RUNNING: 'R', NodeStates.RUNNING: 'R',
NodeStates.TEMPORARILY_DOWN: 'T', NodeStates.TEMPORARILY_DOWN: 'T',
NodeStates.DOWN: 'D', NodeStates.DOWN: 'D',
NodeStates.BROKEN: 'B',
NodeStates.PENDING: 'P', NodeStates.PENDING: 'P',
NodeStates.UNKNOWN: 'U', NodeStates.UNKNOWN: 'U',
} }
...@@ -201,17 +198,14 @@ class ProtocolError(Exception): ...@@ -201,17 +198,14 @@ class ProtocolError(Exception):
""" Base class for protocol errors, close the connection """ """ Base class for protocol errors, close the connection """
class PacketMalformedError(ProtocolError): class PacketMalformedError(ProtocolError):
""" Close the connection and set the node as broken""" """Close the connection"""
class UnexpectedPacketError(ProtocolError): class UnexpectedPacketError(ProtocolError):
""" Close the connection and set the node as broken""" """Close the connection"""
class NotReadyError(ProtocolError): class NotReadyError(ProtocolError):
""" Just close the connection """ """ Just close the connection """
class BrokenNodeDisallowedError(ProtocolError):
""" Just close the connection """
class BackendNotImplemented(Exception): class BackendNotImplemented(Exception):
""" Method not implemented by backend storage """ """ Method not implemented by backend storage """
......
...@@ -168,7 +168,7 @@ class PartitionTable(object): ...@@ -168,7 +168,7 @@ class PartitionTable(object):
def _setCell(self, offset, node, state): def _setCell(self, offset, node, state):
if state == CellStates.DISCARDED: if state == CellStates.DISCARDED:
return self.removeCell(offset, node) return self.removeCell(offset, node)
if node.isBroken() or node.isDown(): if node.isDown():
raise PartitionTableException('Invalid node state') raise PartitionTableException('Invalid node state')
self.count_dict.setdefault(node, 0) self.count_dict.setdefault(node, 0)
......
...@@ -19,7 +19,7 @@ from neo.lib import logging ...@@ -19,7 +19,7 @@ from neo.lib import logging
from neo.lib.exception import StoppedOperation from neo.lib.exception import StoppedOperation
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets, from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets,
BrokenNodeDisallowedError, ProtocolError, ProtocolError,
) )
class MasterHandler(EventHandler): class MasterHandler(EventHandler):
...@@ -37,8 +37,6 @@ class MasterHandler(EventHandler): ...@@ -37,8 +37,6 @@ class MasterHandler(EventHandler):
if node_type is NodeTypes.MASTER and not ( if node_type is NodeTypes.MASTER and not (
None != address == node.getAddress()): None != address == node.getAddress()):
raise ProtocolError raise ProtocolError
if node.isBroken():
raise BrokenNodeDisallowedError
peer_uuid = self._setupNode(conn, node_type, uuid, address, node) peer_uuid = self._setupNode(conn, node_type, uuid, address, node)
if app.primary: if app.primary:
primary_address = app.server primary_address = app.server
...@@ -49,8 +47,6 @@ class MasterHandler(EventHandler): ...@@ -49,8 +47,6 @@ class MasterHandler(EventHandler):
known_master_list = [] known_master_list = []
for n in app.nm.getMasterList(): for n in app.nm.getMasterList():
if n.isBroken():
continue
known_master_list.append((n.getAddress(), n.getUUID())) known_master_list.append((n.getAddress(), n.getUUID()))
conn.answer(Packets.AcceptIdentification( conn.answer(Packets.AcceptIdentification(
NodeTypes.MASTER, NodeTypes.MASTER,
...@@ -113,17 +109,13 @@ class BaseServiceHandler(MasterHandler): ...@@ -113,17 +109,13 @@ class BaseServiceHandler(MasterHandler):
return # for example, when a storage is removed by an admin return # for example, when a storage is removed by an admin
assert node.isStorage(), node assert node.isStorage(), node
logging.info('storage node lost') logging.info('storage node lost')
if new_state != NodeStates.BROKEN: new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN)
new_state = DISCONNECTED_STATE_DICT.get(node.getType(),
NodeStates.DOWN)
assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN,
NodeStates.BROKEN), new_state
assert node.getState() not in (NodeStates.TEMPORARILY_DOWN, assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
NodeStates.DOWN, NodeStates.BROKEN), (uuid_str(self.app.uuid), NodeStates.DOWN), (uuid_str(self.app.uuid),
node.whoSetState(), new_state) node.whoSetState(), new_state)
was_pending = node.isPending() was_pending = node.isPending()
node.setState(new_state) node.setState(new_state)
if new_state != NodeStates.BROKEN and was_pending: if was_pending:
# was in pending state, so drop it from the node manager to forget # was in pending state, so drop it from the node manager to forget
# it and do not set in running state when it comes back # it and do not set in running state when it comes back
logging.info('drop a pending node from the node manager') logging.info('drop a pending node from the node manager')
......
...@@ -61,7 +61,6 @@ UNIT_TEST_MODULES = [ ...@@ -61,7 +61,6 @@ UNIT_TEST_MODULES = [
'neo.tests.storage.testMasterHandler', 'neo.tests.storage.testMasterHandler',
'neo.tests.storage.testStorageApp', 'neo.tests.storage.testStorageApp',
'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'), 'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
'neo.tests.storage.testIdentificationHandler',
'neo.tests.storage.testTransactions', 'neo.tests.storage.testTransactions',
# client application # client application
'neo.tests.client.testClientApp', 'neo.tests.client.testClientApp',
......
...@@ -57,7 +57,7 @@ class BaseMasterHandler(BaseHandler): ...@@ -57,7 +57,7 @@ class BaseMasterHandler(BaseHandler):
# This is me, do what the master tell me # This is me, do what the master tell me
logging.info("I was told I'm %s", state) logging.info("I was told I'm %s", state)
if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN, if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
NodeStates.BROKEN, NodeStates.UNKNOWN): NodeStates.UNKNOWN):
erase = state == NodeStates.DOWN erase = state == NodeStates.DOWN
self.app.shutdown(erase=erase) self.app.shutdown(erase=erase)
elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING: elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
from neo.lib import logging from neo.lib import logging
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import NodeTypes, NotReadyError, Packets from neo.lib.protocol import NodeTypes, NotReadyError, Packets
from neo.lib.protocol import ProtocolError, BrokenNodeDisallowedError from neo.lib.protocol import ProtocolError
from .storage import StorageOperationHandler from .storage import StorageOperationHandler
from .client import ClientOperationHandler, ClientReadOnlyOperationHandler from .client import ClientOperationHandler, ClientReadOnlyOperationHandler
...@@ -47,8 +47,6 @@ class IdentificationHandler(EventHandler): ...@@ -47,8 +47,6 @@ class IdentificationHandler(EventHandler):
if uuid == app.uuid: if uuid == app.uuid:
raise ProtocolError("uuid conflict or loopback connection") raise ProtocolError("uuid conflict or loopback connection")
node = app.nm.getByUUID(uuid, id_timestamp) node = app.nm.getByUUID(uuid, id_timestamp)
if node.isBroken():
raise BrokenNodeDisallowedError
# choose the handler according to the node type # choose the handler according to the node type
if node_type == NodeTypes.CLIENT: if node_type == NodeTypes.CLIENT:
if app.dm.getBackupTID(): if app.dm.getBackupTID():
......
...@@ -219,15 +219,6 @@ class MasterServerElectionTests(MasterClientElectionTestBase): ...@@ -219,15 +219,6 @@ class MasterServerElectionTests(MasterClientElectionTestBase):
self.election.requestIdentification, self.election.requestIdentification,
conn, NodeTypes.CLIENT, *args) conn, NodeTypes.CLIENT, *args)
def test_requestIdentification3(self):
""" A broken master node request identification """
node, conn = self.identifyToMasterNode()
node.setBroken()
args = node.getUUID(), node.getAddress(), self.app.name, None
self.assertRaises(protocol.BrokenNodeDisallowedError,
self.election.requestIdentification,
conn, NodeTypes.MASTER, *args)
def test_requestIdentification4(self): def test_requestIdentification4(self):
""" No conflict """ """ No conflict """
node, conn = self.identifyToMasterNode() node, conn = self.identifyToMasterNode()
......
...@@ -63,11 +63,8 @@ class MasterPartitionTableTests(NeoUnitTestBase): ...@@ -63,11 +63,8 @@ class MasterPartitionTableTests(NeoUnitTestBase):
uuid4 = self.getStorageUUID() uuid4 = self.getStorageUUID()
server4 = ("127.0.0.4", 19004) server4 = ("127.0.0.4", 19004)
sn4 = self.createStorage(server4, uuid4) sn4 = self.createStorage(server4, uuid4)
uuid5 = self.getStorageUUID()
server5 = ("127.0.0.5", 19005)
sn5 = self.createStorage(server5, uuid5)
# create partition table # create partition table
num_partitions = 5 num_partitions = 4
num_replicas = 3 num_replicas = 3
pt = PartitionTable(num_partitions, num_replicas) pt = PartitionTable(num_partitions, num_replicas)
pt._setCell(0, sn1, CellStates.OUT_OF_DATE) pt._setCell(0, sn1, CellStates.OUT_OF_DATE)
...@@ -77,15 +74,13 @@ class MasterPartitionTableTests(NeoUnitTestBase): ...@@ -77,15 +74,13 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt._setCell(2, sn3, CellStates.UP_TO_DATE) pt._setCell(2, sn3, CellStates.UP_TO_DATE)
sn3.setState(NodeStates.DOWN) sn3.setState(NodeStates.DOWN)
pt._setCell(3, sn4, CellStates.UP_TO_DATE) pt._setCell(3, sn4, CellStates.UP_TO_DATE)
sn4.setState(NodeStates.BROKEN) sn4.setState(NodeStates.RUNNING)
pt._setCell(4, sn5, CellStates.UP_TO_DATE)
sn5.setState(NodeStates.RUNNING)
# outdate nodes # outdate nodes
cells_outdated = pt.outdate() cells_outdated = pt.outdate()
self.assertEqual(len(cells_outdated), 3) self.assertEqual(len(cells_outdated), 2)
for offset, uuid, state in cells_outdated: for offset, uuid, state in cells_outdated:
self.assertTrue(offset in (1, 2, 3)) self.assertIn(offset, (1, 2))
self.assertTrue(uuid in (uuid2, uuid3, uuid4)) self.assertIn(uuid, (uuid2, uuid3))
self.assertEqual(state, CellStates.OUT_OF_DATE) self.assertEqual(state, CellStates.OUT_OF_DATE)
# check each cell # check each cell
# part 1, already outdated # part 1, already outdated
...@@ -103,15 +98,10 @@ class MasterPartitionTableTests(NeoUnitTestBase): ...@@ -103,15 +98,10 @@ class MasterPartitionTableTests(NeoUnitTestBase):
self.assertEqual(len(cells), 1) self.assertEqual(len(cells), 1)
cell = cells[0] cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE) self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 4, already outdated # part 4, remains running
cells = pt.getCellList(3) cells = pt.getCellList(3)
self.assertEqual(len(cells), 1) self.assertEqual(len(cells), 1)
cell = cells[0] cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 5, remains running
cells = pt.getCellList(4)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.UP_TO_DATE) self.assertEqual(cell.getState(), CellStates.UP_TO_DATE)
def test_15_dropNodeList(self): def test_15_dropNodeList(self):
......
#
# Copyright (C) 2009-2017 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, BrokenNodeDisallowedError
from neo.lib.pt import PartitionTable
from neo.storage.app import Application
from neo.storage.handlers.identification import IdentificationHandler
class StorageIdentificationHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
self.app.name = 'NEO'
self.app.operational = True
self.app.pt = PartitionTable(4, 1)
self.identification = IdentificationHandler(self.app)
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageIdentificationHandlerTests, self)._tearDown(success)
def test_requestIdentification3(self):
""" broken nodes must be rejected """
uuid = self.getClientUUID()
conn = self.getFakeConnection(uuid=uuid)
node = self.app.nm.createClient(uuid=uuid)
node.setBroken()
self.assertRaises(BrokenNodeDisallowedError,
self.identification.requestIdentification,
conn,
NodeTypes.CLIENT,
uuid,
None,
self.app.name,
None,
)
if __name__ == "__main__":
unittest.main()
...@@ -19,7 +19,7 @@ from .mock import Mock ...@@ -19,7 +19,7 @@ from .mock import Mock
from . import NeoUnitTestBase from . import NeoUnitTestBase
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import PacketMalformedError, UnexpectedPacketError, \ from neo.lib.protocol import PacketMalformedError, UnexpectedPacketError, \
BrokenNodeDisallowedError, NotReadyError, ProtocolError NotReadyError, ProtocolError
class HandlerTests(NeoUnitTestBase): class HandlerTests(NeoUnitTestBase):
...@@ -60,14 +60,6 @@ class HandlerTests(NeoUnitTestBase): ...@@ -60,14 +60,6 @@ class HandlerTests(NeoUnitTestBase):
self.setFakeMethod(fake) self.setFakeMethod(fake)
self.handler.dispatch(conn, packet) self.handler.dispatch(conn, packet)
self.checkClosed(conn) self.checkClosed(conn)
# raise BrokenNodeDisallowedError
conn.mockCalledMethods = {}
def fake(c):
raise BrokenNodeDisallowedError
self.setFakeMethod(fake)
self.handler.dispatch(conn, packet)
self.checkErrorPacket(conn)
self.checkAborted(conn)
# raise NotReadyError # raise NotReadyError
conn.mockCalledMethods = {} conn.mockCalledMethods = {}
def fake(c): def fake(c):
......
...@@ -104,17 +104,11 @@ class PartitionTableTests(NeoUnitTestBase): ...@@ -104,17 +104,11 @@ class PartitionTableTests(NeoUnitTestBase):
else: else:
self.assertEqual(len(pt.partition_list[x]), 0) self.assertEqual(len(pt.partition_list[x]), 0)
# now add broken and down state, must not be taken into account # now add down state, must not be taken into account
pt._setCell(0, sn1, CellStates.DISCARDED) pt._setCell(0, sn1, CellStates.DISCARDED)
for x in xrange(num_partitions): for x in xrange(num_partitions):
self.assertEqual(len(pt.partition_list[x]), 0) self.assertEqual(len(pt.partition_list[x]), 0)
self.assertEqual(pt.count_dict[sn1], 0) self.assertEqual(pt.count_dict[sn1], 0)
sn1.setState(NodeStates.BROKEN)
self.assertRaises(PartitionTableException, pt._setCell,
0, sn1, CellStates.UP_TO_DATE)
for x in xrange(num_partitions):
self.assertEqual(len(pt.partition_list[x]), 0)
self.assertEqual(pt.count_dict[sn1], 0)
sn1.setState(NodeStates.DOWN) sn1.setState(NodeStates.DOWN)
self.assertRaises(PartitionTableException, pt._setCell, self.assertRaises(PartitionTableException, pt._setCell,
0, sn1, CellStates.UP_TO_DATE) 0, sn1, CellStates.UP_TO_DATE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment