Commit 9d7f9795 authored by Julien Muchembled's avatar Julien Muchembled

Remove BROKEN node state

parent b8210d58
......@@ -2,7 +2,6 @@
- Clarify node state signification, and consider renaming them in the code.
Ideas:
TEMPORARILY_DOWN becomes UNAVAILABLE
BROKEN is removed ?
- Clarify the use of each error codes:
- NOT_READY removed (connection kept opened until ready)
- Split PROTOCOL_ERROR (BAD IDENTIFICATION, ...)
......
......@@ -99,7 +99,6 @@ class ConnectionPool(object):
return conn
def removeConnection(self, node):
"""Explicitly remove connection when a node is broken."""
self.connection_dict.pop(node.getUUID(), None)
def closeAll(self):
......
......@@ -141,7 +141,6 @@ class HandlerSwitcher(object):
connection.send(Packets.Notify(
'Unexpected answer: %r' % packet))
connection.abort()
# handler.peerBroken(connection)
finally:
# apply a pending handler if no more answers are pending
while len(pending) > 1 and not pending[0][0]:
......
......@@ -20,9 +20,8 @@ from operator import itemgetter
from . import logging
from .connection import ConnectionClosed
from .protocol import (
NodeStates, Packets, Errors, BackendNotImplemented,
BrokenNodeDisallowedError, NonReadableCell, NotReadyError,
PacketMalformedError, ProtocolError, UnexpectedPacketError)
NodeStates, Packets, Errors, BackendNotImplemented, NonReadableCell,
NotReadyError, PacketMalformedError, ProtocolError, UnexpectedPacketError)
from .util import cached_property
......@@ -59,7 +58,6 @@ class EventHandler(object):
logging.error(message)
conn.answer(Errors.ProtocolError(message))
conn.abort()
# self.peerBroken(conn)
def dispatch(self, conn, packet, kw={}):
"""This is a helper method to handle various packet types."""
......@@ -80,11 +78,6 @@ class EventHandler(object):
except PacketMalformedError, e:
logging.error('malformed packet from %r: %s', conn, e)
conn.close()
# self.peerBroken(conn)
except BrokenNodeDisallowedError:
if not conn.isClosed():
conn.answer(Errors.BrokenNode('go away'))
conn.abort()
except NotReadyError, message:
if not conn.isClosed():
if not message.args:
......@@ -146,11 +139,6 @@ class EventHandler(object):
logging.debug('connection closed for %r', conn)
self.connectionLost(conn, NodeStates.TEMPORARILY_DOWN)
#def peerBroken(self, conn):
# """Called when a peer is broken."""
# logging.error('%r is broken', conn)
# # NodeStates.BROKEN
def connectionLost(self, conn, new_state):
""" this is a method to override in sub-handlers when there is no need
to make distinction from the kind event that closed the connection """
......@@ -216,9 +204,6 @@ class EventHandler(object):
def timeoutError(self, conn, message):
logging.error('timeout error: %s', message)
def brokenNodeDisallowedError(self, conn, message):
raise RuntimeError, 'broken node disallowed error: %s' % (message,)
def ack(self, conn, message):
logging.debug("no error message: %s", message)
......@@ -268,7 +253,6 @@ class AnswerBaseHandler(EventHandler):
timeoutExpired = unexpectedInAnswerHandler
connectionClosed = unexpectedInAnswerHandler
packetReceived = unexpectedInAnswerHandler
peerBroken = unexpectedInAnswerHandler
protocolError = unexpectedInAnswerHandler
def acceptIdentification(*args):
......
......@@ -70,7 +70,6 @@ def ErrorCodes():
TID_NOT_FOUND
OID_DOES_NOT_EXIST
PROTOCOL_ERROR
BROKEN_NODE
REPLICATION_ERROR
CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED
......@@ -123,7 +122,6 @@ def NodeStates():
RUNNING
TEMPORARILY_DOWN
DOWN
BROKEN
PENDING
UNKNOWN
......@@ -151,7 +149,6 @@ node_state_prefix_dict = {
NodeStates.RUNNING: 'R',
NodeStates.TEMPORARILY_DOWN: 'T',
NodeStates.DOWN: 'D',
NodeStates.BROKEN: 'B',
NodeStates.PENDING: 'P',
NodeStates.UNKNOWN: 'U',
}
......@@ -201,17 +198,14 @@ class ProtocolError(Exception):
""" Base class for protocol errors, close the connection """
class PacketMalformedError(ProtocolError):
""" Close the connection and set the node as broken"""
"""Close the connection"""
class UnexpectedPacketError(ProtocolError):
""" Close the connection and set the node as broken"""
"""Close the connection"""
class NotReadyError(ProtocolError):
""" Just close the connection """
class BrokenNodeDisallowedError(ProtocolError):
""" Just close the connection """
class BackendNotImplemented(Exception):
""" Method not implemented by backend storage """
......
......@@ -168,7 +168,7 @@ class PartitionTable(object):
def _setCell(self, offset, node, state):
if state == CellStates.DISCARDED:
return self.removeCell(offset, node)
if node.isBroken() or node.isDown():
if node.isDown():
raise PartitionTableException('Invalid node state')
self.count_dict.setdefault(node, 0)
......
......@@ -19,7 +19,7 @@ from neo.lib import logging
from neo.lib.exception import StoppedOperation
from neo.lib.handler import EventHandler
from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets,
BrokenNodeDisallowedError, ProtocolError,
ProtocolError,
)
class MasterHandler(EventHandler):
......@@ -37,8 +37,6 @@ class MasterHandler(EventHandler):
if node_type is NodeTypes.MASTER and not (
None != address == node.getAddress()):
raise ProtocolError
if node.isBroken():
raise BrokenNodeDisallowedError
peer_uuid = self._setupNode(conn, node_type, uuid, address, node)
if app.primary:
primary_address = app.server
......@@ -49,8 +47,6 @@ class MasterHandler(EventHandler):
known_master_list = []
for n in app.nm.getMasterList():
if n.isBroken():
continue
known_master_list.append((n.getAddress(), n.getUUID()))
conn.answer(Packets.AcceptIdentification(
NodeTypes.MASTER,
......@@ -113,17 +109,13 @@ class BaseServiceHandler(MasterHandler):
return # for example, when a storage is removed by an admin
assert node.isStorage(), node
logging.info('storage node lost')
if new_state != NodeStates.BROKEN:
new_state = DISCONNECTED_STATE_DICT.get(node.getType(),
NodeStates.DOWN)
assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN,
NodeStates.BROKEN), new_state
new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN)
assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
NodeStates.DOWN, NodeStates.BROKEN), (uuid_str(self.app.uuid),
NodeStates.DOWN), (uuid_str(self.app.uuid),
node.whoSetState(), new_state)
was_pending = node.isPending()
node.setState(new_state)
if new_state != NodeStates.BROKEN and was_pending:
if was_pending:
# was in pending state, so drop it from the node manager to forget
# it and do not set in running state when it comes back
logging.info('drop a pending node from the node manager')
......
......@@ -61,7 +61,6 @@ UNIT_TEST_MODULES = [
'neo.tests.storage.testMasterHandler',
'neo.tests.storage.testStorageApp',
'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
'neo.tests.storage.testIdentificationHandler',
'neo.tests.storage.testTransactions',
# client application
'neo.tests.client.testClientApp',
......
......@@ -57,7 +57,7 @@ class BaseMasterHandler(BaseHandler):
# This is me, do what the master tell me
logging.info("I was told I'm %s", state)
if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
NodeStates.BROKEN, NodeStates.UNKNOWN):
NodeStates.UNKNOWN):
erase = state == NodeStates.DOWN
self.app.shutdown(erase=erase)
elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
......
......@@ -17,7 +17,7 @@
from neo.lib import logging
from neo.lib.handler import EventHandler
from neo.lib.protocol import NodeTypes, NotReadyError, Packets
from neo.lib.protocol import ProtocolError, BrokenNodeDisallowedError
from neo.lib.protocol import ProtocolError
from .storage import StorageOperationHandler
from .client import ClientOperationHandler, ClientReadOnlyOperationHandler
......@@ -47,8 +47,6 @@ class IdentificationHandler(EventHandler):
if uuid == app.uuid:
raise ProtocolError("uuid conflict or loopback connection")
node = app.nm.getByUUID(uuid, id_timestamp)
if node.isBroken():
raise BrokenNodeDisallowedError
# choose the handler according to the node type
if node_type == NodeTypes.CLIENT:
if app.dm.getBackupTID():
......
......@@ -219,15 +219,6 @@ class MasterServerElectionTests(MasterClientElectionTestBase):
self.election.requestIdentification,
conn, NodeTypes.CLIENT, *args)
def test_requestIdentification3(self):
""" A broken master node request identification """
node, conn = self.identifyToMasterNode()
node.setBroken()
args = node.getUUID(), node.getAddress(), self.app.name, None
self.assertRaises(protocol.BrokenNodeDisallowedError,
self.election.requestIdentification,
conn, NodeTypes.MASTER, *args)
def test_requestIdentification4(self):
""" No conflict """
node, conn = self.identifyToMasterNode()
......
......@@ -63,11 +63,8 @@ class MasterPartitionTableTests(NeoUnitTestBase):
uuid4 = self.getStorageUUID()
server4 = ("127.0.0.4", 19004)
sn4 = self.createStorage(server4, uuid4)
uuid5 = self.getStorageUUID()
server5 = ("127.0.0.5", 19005)
sn5 = self.createStorage(server5, uuid5)
# create partition table
num_partitions = 5
num_partitions = 4
num_replicas = 3
pt = PartitionTable(num_partitions, num_replicas)
pt._setCell(0, sn1, CellStates.OUT_OF_DATE)
......@@ -77,15 +74,13 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt._setCell(2, sn3, CellStates.UP_TO_DATE)
sn3.setState(NodeStates.DOWN)
pt._setCell(3, sn4, CellStates.UP_TO_DATE)
sn4.setState(NodeStates.BROKEN)
pt._setCell(4, sn5, CellStates.UP_TO_DATE)
sn5.setState(NodeStates.RUNNING)
sn4.setState(NodeStates.RUNNING)
# outdate nodes
cells_outdated = pt.outdate()
self.assertEqual(len(cells_outdated), 3)
self.assertEqual(len(cells_outdated), 2)
for offset, uuid, state in cells_outdated:
self.assertTrue(offset in (1, 2, 3))
self.assertTrue(uuid in (uuid2, uuid3, uuid4))
self.assertIn(offset, (1, 2))
self.assertIn(uuid, (uuid2, uuid3))
self.assertEqual(state, CellStates.OUT_OF_DATE)
# check each cell
# part 1, already outdated
......@@ -103,15 +98,10 @@ class MasterPartitionTableTests(NeoUnitTestBase):
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 4, already outdated
# part 4, remains running
cells = pt.getCellList(3)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 5, remains running
cells = pt.getCellList(4)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.UP_TO_DATE)
def test_15_dropNodeList(self):
......
#
# Copyright (C) 2009-2017 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, BrokenNodeDisallowedError
from neo.lib.pt import PartitionTable
from neo.storage.app import Application
from neo.storage.handlers.identification import IdentificationHandler
class StorageIdentificationHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
self.app.name = 'NEO'
self.app.operational = True
self.app.pt = PartitionTable(4, 1)
self.identification = IdentificationHandler(self.app)
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageIdentificationHandlerTests, self)._tearDown(success)
def test_requestIdentification3(self):
""" broken nodes must be rejected """
uuid = self.getClientUUID()
conn = self.getFakeConnection(uuid=uuid)
node = self.app.nm.createClient(uuid=uuid)
node.setBroken()
self.assertRaises(BrokenNodeDisallowedError,
self.identification.requestIdentification,
conn,
NodeTypes.CLIENT,
uuid,
None,
self.app.name,
None,
)
if __name__ == "__main__":
unittest.main()
......@@ -19,7 +19,7 @@ from .mock import Mock
from . import NeoUnitTestBase
from neo.lib.handler import EventHandler
from neo.lib.protocol import PacketMalformedError, UnexpectedPacketError, \
BrokenNodeDisallowedError, NotReadyError, ProtocolError
NotReadyError, ProtocolError
class HandlerTests(NeoUnitTestBase):
......@@ -60,14 +60,6 @@ class HandlerTests(NeoUnitTestBase):
self.setFakeMethod(fake)
self.handler.dispatch(conn, packet)
self.checkClosed(conn)
# raise BrokenNodeDisallowedError
conn.mockCalledMethods = {}
def fake(c):
raise BrokenNodeDisallowedError
self.setFakeMethod(fake)
self.handler.dispatch(conn, packet)
self.checkErrorPacket(conn)
self.checkAborted(conn)
# raise NotReadyError
conn.mockCalledMethods = {}
def fake(c):
......
......@@ -104,17 +104,11 @@ class PartitionTableTests(NeoUnitTestBase):
else:
self.assertEqual(len(pt.partition_list[x]), 0)
# now add broken and down state, must not be taken into account
# now add down state, must not be taken into account
pt._setCell(0, sn1, CellStates.DISCARDED)
for x in xrange(num_partitions):
self.assertEqual(len(pt.partition_list[x]), 0)
self.assertEqual(pt.count_dict[sn1], 0)
sn1.setState(NodeStates.BROKEN)
self.assertRaises(PartitionTableException, pt._setCell,
0, sn1, CellStates.UP_TO_DATE)
for x in xrange(num_partitions):
self.assertEqual(len(pt.partition_list[x]), 0)
self.assertEqual(pt.count_dict[sn1], 0)
sn1.setState(NodeStates.DOWN)
self.assertRaises(PartitionTableException, pt._setCell,
0, sn1, CellStates.UP_TO_DATE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment