Commit 7b0e997f authored by Grégory Wisniewski's avatar Grégory Wisniewski

Splity master node handlers:

- All incoming connections start with the identification handler which will
apply the right handler depending on the current cluster state and the node
type. So we are now sure that a node is well identified, got an UUID and is
registered into the node manager when it leaves the identification handler.
- During service, clietnt and storage nodes have their own handler


git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@686 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 5088acb2
This diff is collapsed.
......@@ -30,29 +30,6 @@ class MasterEventHandler(EventHandler):
self.app = app
EventHandler.__init__(self)
def acceptNodeIdentification(self, conn, packet, uuid):
""" Send a packet to accept the node identification """
app = self.app
args = (protocol.MASTER_NODE_TYPE, app.uuid,
app.server[0], app.server[1],
app.pt.getPartitions(), app.pt.getReplicas(),
uuid)
p = protocol.acceptNodeIdentification(*args)
conn.answer(p, packet)
def registerAdminNode(self, conn, packet, uuid, server):
""" Register the connection's peer as an admin node """
from neo.master.administration import AdministrationEventHandler
from neo.node import AdminNode
node = self.app.nm.getNodeByUUID(uuid)
if node is None:
uuid = self.app.getNewUUID(protocol.ADMIN_NODE_TYPE)
self.app.nm.add(AdminNode(uuid=uuid, server=server))
conn.setUUID(uuid)
conn.setHandler(AdministrationEventHandler(self.app))
logging.info('Register admin node %s' % util.dump(uuid))
self.acceptNodeIdentification(conn, packet, uuid)
def handleRequestNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port, name):
raise NotImplementedError('this method must be overridden')
......
#
# Copyright (C) 2006-2009 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import logging
from neo import protocol
from neo.node import AdminNode, MasterNode, ClientNode, StorageNode
from neo.master.handler import MasterEventHandler
from neo import decorators
class IdentificationEventHandler(MasterEventHandler):
"""This class deals with messages from the admin node only"""
def connectionClosed(self, conn):
logging.warning('lost a node in IdentificationEventHandler')
def timeoutExpired(self, conn):
logging.warning('lost a node in IdentificationEventHandler')
def peerBroken(self, conn):
logging.warning('lost a node in IdentificationEventHandler')
# TODO: move this into a new handler
@decorators.identification_required
def handleAnnouncePrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
# I am also the primary... So restart the election.
raise ElectionFailure, 'another primary arises'
def handleReelectPrimaryMaster(self, conn, packet):
raise ElectionFailure, 'reelection requested'
def handleNotifyNodeInformation(self, conn, packet, node_list):
# XXX: Secondary master can send this packet
logging.error('ignoring NotifyNodeInformation packet')
def handleRequestNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port, name):
# TODO: handle broken nodes
self.checkClusterName(name)
app, nm = self.app, self.app.nm
server = (ip_address, port)
node_by_uuid = nm.getNodeByUUID(uuid)
node_by_addr = nm.getNodeByServer(server)
if node_by_uuid is not None and node_by_addr is not None and \
node_by_uuid is not node_by_addr:
# got a conflict, but UUIDs should be more reliable
# TODO: delete the old node...
raise RuntimeError('node conflict not implemented yet')
pass
node = node_by_uuid or node_by_addr
if node is not None and node.getServer() != server:
# address changed
# TODO: delete or update the old node ?
node.setServer(server)
raise RuntimeError('node address changement not implemented yet')
# ask the app the node identification, if refused, an exception is raised
result = self.app.identifyNode(node_type, uuid, node)
(uuid, node, state, handler, klass) = result
if uuid == protocol.INVALID_UUID:
# no valid uuid, give it one
uuid = app.getNewUUID(node_type)
if node is None:
# new node
node = klass(uuid=uuid, server=(ip_address, port))
app.nm.add(node)
handler = handler(self.app)
# set up the node
node.setUUID(uuid)
node.setState(state)
# set up the connection
conn.setUUID(uuid)
conn.setHandler(handler)
# XXX: Here we could bin conn and node together
# answer
args = (protocol.MASTER_NODE_TYPE, app.uuid, app.server[0], app.server[1],
app.pt.getPartitions(), app.pt.getReplicas(), uuid)
conn.answer(protocol.acceptNodeIdentification(*args), packet)
# trigger the event
handler.connectionCompleted(conn)
app.broadcastNodeInformation(node)
......@@ -26,161 +26,42 @@ from neo.exception import ElectionFailure
from neo.protocol import Packet, UnexpectedPacketError, INVALID_UUID, INVALID_PTID
from neo.node import ClientNode, StorageNode, MasterNode, AdminNode
from neo.util import dump
from neo import decorators
class RecoveryEventHandler(MasterEventHandler):
"""This class deals with events for a recovery phase."""
def connectionCompleted(self, conn):
# ask the last IDs to perform the the recovery
conn.ask(protocol.askLastIDs())
def connectionClosed(self, conn):
app = self.app
uuid = conn.getUUID()
if uuid is not None:
app = self.app
node = app.nm.getNodeByUUID(uuid)
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
app.broadcastNodeInformation(node)
node = app.nm.getNodeByUUID(uuid)
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
app.broadcastNodeInformation(node)
MasterEventHandler.connectionClosed(self, conn)
def timeoutExpired(self, conn):
app = self.app
uuid = conn.getUUID()
if uuid is not None:
app = self.app
node = app.nm.getNodeByUUID(uuid)
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
app.broadcastNodeInformation(node)
node = app.nm.getNodeByUUID(uuid)
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
app.broadcastNodeInformation(node)
MasterEventHandler.timeoutExpired(self, conn)
def peerBroken(self, conn):
uuid = conn.getUUID()
if uuid is not None:
app = self.app
node = app.nm.getNodeByUUID(uuid)
if node.getState() != BROKEN_STATE:
node.setState(BROKEN_STATE)
app.broadcastNodeInformation(node)
MasterEventHandler.peerBroken(self, conn)
def packetReceived(self, conn, packet):
MasterEventHandler.packetReceived(self, conn, packet)
def handleRequestNodeIdentification(self, conn, packet, node_type, uuid,
ip_address, port, name):
self.checkClusterName(name)
app = self.app
addr = (ip_address, port)
if node_type == ADMIN_NODE_TYPE:
self.registerAdminNode(conn, packet, uuid, addr)
return
if node_type not in (MASTER_NODE_TYPE, STORAGE_NODE_TYPE):
logging.info('reject a connection from a client')
raise protocol.NotReadyError
if node_type is STORAGE_NODE_TYPE and uuid is INVALID_UUID:
# refuse an empty storage node (with no UUID) to avoid potential
# UUID conflict
logging.info('reject empty storage node')
raise protocol.NotReadyError
# Here are many situations. In principle, a node should be identified by
# an UUID, since an UUID never change when moving a storage node to a different
# server, and an UUID always changes for a master node and a client node whenever
# it restarts, so more reliable than a server address.
#
# However, master nodes can be known only as the server addresses. And, a node
# may claim a server address used by another node.
node = app.nm.getNodeByUUID(uuid)
if not app.isValidUUID(uuid, addr):
# Here we have an UUID conflict, assume that's a new node
node = None
else:
# First, get the node by the UUID.
node = app.nm.getNodeByUUID(uuid)
if node is None:
# generate an uuid for this node
while not app.isValidUUID(uuid, addr):
uuid = app.getNewUUID(node_type)
# If nothing is present, try with the server address.
node = app.nm.getNodeByServer(addr)
if node is None:
# Nothing is found. So this must be the first time that this node
# connected to me.
if node_type == MASTER_NODE_TYPE:
node = MasterNode(server = addr, uuid = uuid)
else:
node = StorageNode(server = addr, uuid = uuid)
app.nm.add(node)
app.broadcastNodeInformation(node)
else:
# Otherwise, I know it only by the server address or the same server
# address but with a different UUID.
if node.getUUID() is None:
# This must be a master node.
if node.getNodeType() != MASTER_NODE_TYPE or node_type != MASTER_NODE_TYPE:
# Error. This node uses the same server address as a master
# node.
raise protocol.ProtocolError('invalid server address')
node.setUUID(uuid)
if node.getState() != RUNNING_STATE:
node.setState(RUNNING_STATE)
app.broadcastNodeInformation(node)
else:
# This node has a different UUID.
if node.getState() == RUNNING_STATE:
# If it is still running, reject this node.
raise protocol.ProtocolError('invalid server address')
# Otherwise, forget the old one.
node.setState(BROKEN_STATE)
app.broadcastNodeInformation(node)
# And insert a new one.
node.setUUID(uuid)
node.setState(RUNNING_STATE)
app.broadcastNodeInformation(node)
else:
# I know this node by the UUID.
if node.getServer() != addr:
# This node has a different server address.
if node.getState() == RUNNING_STATE:
# If it is still running, reject this node.
raise protocol.ProtocolError('invalid server address')
# Otherwise, forget the old one.
node.setState(BROKEN_STATE)
app.broadcastNodeInformation(node)
# And insert a new one.
node.setServer(addr)
node.setState(RUNNING_STATE)
app.broadcastNodeInformation(node)
else:
# If this node is broken, reject it. Otherwise, assume that it is
# working again.
if node.getState() == BROKEN_STATE:
raise protocol.BrokenNodeDisallowedError
node.setUUID(uuid)
node.setState(RUNNING_STATE)
app.broadcastNodeInformation(node)
conn.setUUID(uuid)
self.acceptNodeIdentification(conn, packet, uuid)
if node_type is STORAGE_NODE_TYPE:
# ask the last IDs.
conn.ask(protocol.askLastIDs())
@decorators.identification_required
def handleAnnouncePrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
# I am also the primary... So restart the election.
raise ElectionFailure, 'another primary arises'
def handleReelectPrimaryMaster(self, conn, packet):
raise ElectionFailure, 'reelection requested'
node = app.nm.getNodeByUUID(uuid)
if node.getState() != BROKEN_STATE:
node.setState(BROKEN_STATE)
app.broadcastNodeInformation(node)
MasterEventHandler.peerBroken(self, conn)
@decorators.identification_required
def handleNotifyNodeInformation(self, conn, packet, node_list):
uuid = conn.getUUID()
app = self.app
for node_type, ip_address, port, uuid, state in node_list:
if node_type in (CLIENT_NODE_TYPE, ADMIN_NODE_TYPE):
......@@ -223,14 +104,12 @@ class RecoveryEventHandler(MasterEventHandler):
# Something wrong happened possibly. Cut the connection to this node,
# if any, and notify the information to others.
# XXX this can be very slow.
for c in app.em.getConnectionList():
if c.getUUID() == uuid:
c.close()
c = app.em.getConnectionByUUID(uuid)
if c is not None:
c.close()
node.setState(state)
app.broadcastNodeInformation(node)
@decorators.identification_required
@decorators.restrict_node_types(STORAGE_NODE_TYPE)
def handleAnswerLastIDs(self, conn, packet, loid, ltid, lptid):
uuid = conn.getUUID()
app = self.app
......@@ -251,8 +130,6 @@ class RecoveryEventHandler(MasterEventHandler):
elif app.pt.getID() == lptid and app.target_uuid is None:
app.target_uuid = uuid
@decorators.identification_required
@decorators.restrict_node_types(STORAGE_NODE_TYPE)
def handleAnswerPartitionTable(self, conn, packet, ptid, row_list):
uuid = conn.getUUID()
app = self.app
......
This diff is collapsed.
This diff is collapsed.
......@@ -314,8 +314,10 @@ INTERNAL_ERROR_CODE = 8
# Cluster states
cluster_states = Enum({
'BOOTING': 1,
'RUNNING': 2,
'STOPPING': 3,
'RECOVERING': 2,
'VERIFYING': 3,
'RUNNING': 4,
'STOPPING': 5,
})
VALID_CLUSTER_STATE_LIST = (BOOTING, RUNNING, STOPPING)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment