Commit f627727f authored by Vincent Pelletier's avatar Vincent Pelletier

Reorder connection initialisation packets for master connection:

- first, asks which master is the primary
- then, when connected to the primary, request node identification
- then, ask explicitely for partition table and node information, to follow query/answer schema.
Details:
- Master:
 - Move handleAskPrimaryMaster, handleAskNodeInformation and handleAskPartitionTable implementations to base handler to factorise code
 - handleRequestNodeIdentification is not required any more in secondary mode, only handleAskPrimaryMaster
- Client:
 - Make bootstrap handler and master connection behave more similarly to on storage's equivalents
 - Remove PrimaryHandler (moved into PrimaryNotificationsHandler)
 - parse master node configured ip list just once, and register them to node manager just once (in app's __init__)


git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@681 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 3a48be31
......@@ -162,9 +162,8 @@ class MonitoringEventHandler(BaseEventHandler):
# Should not happen.
raise RuntimeError('connection completed while not trying to connect')
p = protocol.requestNodeIdentification(ADMIN_NODE_TYPE,
app.uuid, app.server[0], app.server[1], app.name)
conn.ask(p)
# Ask a primary master.
conn.ask(protocol.askPrimaryMaster())
EventHandler.connectionCompleted(self, conn)
def connectionFailed(self, conn):
......@@ -275,8 +274,8 @@ class MonitoringEventHandler(BaseEventHandler):
# got an uuid from the primary master
app.uuid = your_uuid
# Ask a primary master.
conn.ask(protocol.askPrimaryMaster())
conn.ask(protocol.askNodeInformation())
conn.ask(protocol.askPartitionTable([]))
def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid,
known_master_list):
......@@ -322,6 +321,9 @@ class MonitoringEventHandler(BaseEventHandler):
app.trying_master_node = None
conn.close()
p = protocol.requestNodeIdentification(ADMIN_NODE_TYPE,
app.uuid, app.server[0], app.server[1], app.name)
conn.ask(p)
@decorators.identification_required
def handleSendPartitionTable(self, conn, packet, ptid, row_list):
......@@ -349,6 +351,10 @@ class MonitoringEventHandler(BaseEventHandler):
pt.log()
@decorators.identification_required
def handleAnswerPartitionTable(self, conn, packet, ptid, row_list):
logging.warning("handleAnswerPartitionTable")
@decorators.identification_required
def handleNotifyPartitionChanges(self, conn, packet, ptid, cell_list):
logging.warning("handleNotifyPartitionChanges")
......@@ -442,6 +448,10 @@ class MonitoringEventHandler(BaseEventHandler):
self.app.notified = True
@decorators.identification_required
def handleAnswerNodeInformation(self, conn, packet, node_list):
logging.info("handleAnswerNodeInformation")
def handleAnswerClusterState(self, conn, packet, state):
self.app.cluster_state = state
......
......@@ -231,7 +231,21 @@ class Application(object):
self.pt = None
self.master_conn = None
self.primary_master_node = None
self.master_node_list = master_nodes.split(' ')
self.trying_master_node = None
# XXX: this code duplicates neo.config.ConfigurationManager.getMasterNodeList
self.master_node_list = master_node_list = []
for node in master_nodes.split():
if not node:
continue
if ':' in node:
ip_address, port = node.split(':')
port = int(port)
else:
ip_address = node
port = 10100 # XXX: default_master_port
server = (ip_address, port)
master_node_list.append(server)
self.nm.add(MasterNode(server=server))
# no self-assigned UUID, primary master will supply us one
self.uuid = INVALID_UUID
self.mq_cache = MQ()
......@@ -239,6 +253,7 @@ class Application(object):
self.ptid = INVALID_PTID
self.storage_handler = StorageAnswersHandler(self, self.dispatcher)
self.primary_handler = PrimaryAnswersHandler(self, self.dispatcher)
self.primary_bootstrap_handler = PrimaryBootstrapHandler(self, self.dispatcher)
self.notifications_handler = PrimaryNotificationsHandler(self, self.dispatcher)
# Internal attribute distinct between thread
self.local_var = ThreadContext()
......@@ -352,70 +367,72 @@ class Application(object):
def _connectToPrimaryMasterNode(self):
logging.debug('connecting to primary master...')
master_index = 0
# Make application execute remaining message if any
self._waitMessage()
while True:
self.setNodeReady()
if self.primary_master_node in (None, -1):
# Try with master node defined in config
ready = False
nm = self.nm
while not ready:
# Get network connection to primary master
index = 0
connected = False
while not connected:
if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it.
self.trying_master_node = self.primary_master_node
else:
# Otherwise, check one by one.
master_list = nm.getMasterNodeList()
try:
self.trying_master_node = master_list[index]
except IndexError:
index = 0
self.trying_master_node = master_list[0]
index += 1
# Connect to master
conn = MTClientConnection(self.em, self.notifications_handler,
addr=self.trying_master_node.getServer(),
connector_handler=self.connector_handler)
# Query for primary master node
conn.lock()
try:
addr, port = self.master_node_list[master_index].split(':')
except IndexError:
master_index = 0
addr, port = self.master_node_list[master_index].split(':')
port = int(port)
else:
addr, port = self.primary_master_node.getServer()
# Request Node Identification
handler = PrimaryBootstrapHandler(self, self.dispatcher)
conn = MTClientConnection(self.em, handler, (addr, port),
connector_handler=self.connector_handler)
self._nm_acquire()
try:
if self.nm.getNodeByServer((addr, port)) is None:
n = MasterNode(server = (addr, port))
self.nm.add(n)
finally:
self._nm_release()
msg_id = conn.ask(protocol.askPrimaryMaster())
self.dispatcher.register(conn, msg_id, self.local_var.queue)
finally:
conn.unlock()
self._waitMessage(conn, msg_id, handler=self.primary_bootstrap_handler)
# If we reached the primary master node, mark as connected
connected = self.primary_master_node is not None \
and self.primary_master_node is self.trying_master_node
# Identify to primary master and request initial data
conn.lock()
try:
p = protocol.requestNodeIdentification(CLIENT_NODE_TYPE,
p = protocol.requestNodeIdentification(CLIENT_NODE_TYPE,
self.uuid, '0.0.0.0', 0, self.name)
msg_id = conn.ask(p)
self.dispatcher.register(conn, msg_id, self.local_var.queue)
finally:
conn.unlock()
# Wait for answer
while 1:
self._waitMessage(handler=handler)
# Now check result
if self.primary_master_node is not None:
if self.primary_master_node == -1:
# Connection failed, try with another master node
self.primary_master_node = None
master_index += 1
break
elif self.primary_master_node.getServer() != (addr, port):
# Master node changed, connect to new one
break
elif not self.isNodeReady():
# Wait a bit and reask again
break
elif self.pt is not None and self.pt.operational():
# Connected to primary master node
break
sleep(0.1)
if self.pt is not None and self.pt.operational() \
and self.uuid != INVALID_UUID:
# Connected to primary master node and got all informations
break
sleep(1)
self._waitMessage(conn, msg_id, handler=self.primary_bootstrap_handler)
if self.uuid != INVALID_UUID:
# TODO: pipeline those 2 requests
# This is currently impossible because _waitMessage can only
# wait on one message at a time
conn.lock()
try:
msg_id = conn.ask(protocol.askPartitionTable([]))
self.dispatcher.register(conn, msg_id, self.local_var.queue)
finally:
conn.unlock()
self._waitMessage(conn, msg_id, handler=self.primary_bootstrap_handler)
conn.lock()
try:
msg_id = conn.ask(protocol.askNodeInformation())
self.dispatcher.register(conn, msg_id, self.local_var.queue)
finally:
conn.unlock()
self._waitMessage(conn, msg_id, handler=self.primary_bootstrap_handler)
ready = self.uuid != INVALID_UUID and self.pt is not None \
and self.pt.operational()
logging.info("connected to primary master node %s" % self.primary_master_node)
conn.setHandler(PrimaryNotificationsHandler(self, self.dispatcher))
return conn
def registerDB(self, db, limit):
......
This diff is collapsed.
......@@ -39,11 +39,7 @@ class ElectionEventHandler(MasterEventHandler):
MasterEventHandler.connectionStarted(self, conn)
def connectionCompleted(self, conn):
app = self.app
# Request a node idenfitication.
p = protocol.requestNodeIdentification(MASTER_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.name)
conn.ask(p)
conn.ask(protocol.askPrimaryMaster())
MasterEventHandler.connectionCompleted(self, conn)
def connectionFailed(self, conn):
......@@ -119,8 +115,11 @@ class ElectionEventHandler(MasterEventHandler):
conn.setUUID(uuid)
node.setUUID(uuid)
# Ask a primary master.
conn.ask(protocol.askPrimaryMaster())
if app.uuid < uuid:
# I lost.
app.primary = False
app.negotiating_master_node_set.discard(conn.getAddress())
@decorators.client_connection_required
def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid, known_master_list):
......@@ -161,12 +160,10 @@ class ElectionEventHandler(MasterEventHandler):
# Whatever the situation is, I trust this master.
app.primary = False
app.primary_master_node = primary_node
else:
if app.uuid < conn.getUUID():
# I lost.
app.primary = False
app.negotiating_master_node_set.discard(conn.getAddress())
# Request a node idenfitication.
conn.ask(protocol.requestNodeIdentification(MASTER_NODE_TYPE,
app.uuid, app.server[0], app.server[1], app.name))
@decorators.server_connection_required
def handleRequestNodeIdentification(self, conn, packet, node_type,
......@@ -198,28 +195,6 @@ class ElectionEventHandler(MasterEventHandler):
p = protocol.acceptNodeIdentification(MASTER_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.pt.getPartitions(),
app.pt.getReplicas(), uuid)
# Next, the peer should ask a primary master node.
conn.answer(p, packet)
@decorators.identification_required
@decorators.server_connection_required
def handleAskPrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
app = self.app
if app.primary:
primary_uuid = app.uuid
elif app.primary_master_node is not None:
primary_uuid = app.primary_master_node.getUUID()
else:
primary_uuid = INVALID_UUID
known_master_list = []
for n in app.nm.getMasterNodeList():
if n.getState() == BROKEN_STATE:
continue
info = n.getServer() + (n.getUUID() or INVALID_UUID,)
known_master_list.append(info)
p = protocol.answerPrimaryMaster(primary_uuid, known_master_list)
conn.answer(p, packet)
@decorators.identification_required
......
......@@ -18,6 +18,8 @@
import logging
from neo.handler import EventHandler
from neo.protocol import INVALID_UUID, BROKEN_STATE, ADMIN_NODE_TYPE
from neo import protocol
class MasterEventHandler(EventHandler):
"""This class implements a generic part of the event handlers."""
......@@ -29,9 +31,6 @@ class MasterEventHandler(EventHandler):
uuid, ip_address, port, name):
raise NotImplementedError('this method must be overridden')
def handleAskPrimaryMaster(self, conn, packet):
raise NotImplementedError('this method must be overridden')
def handleAnnouncePrimaryMaster(self, conn, packet):
raise NotImplementedError('this method must be overridden')
......@@ -85,3 +84,32 @@ class MasterEventHandler(EventHandler):
def handleNotifyPartitionChanges(self, conn, packet, ptid, cell_list):
logging.error('ignoring notify partition changes in %s' % self.__class__.__name__)
def handleAskPrimaryMaster(self, conn, packet):
app = self.app
if app.primary:
primary_uuid = app.uuid
elif app.primary_master_node is not None:
primary_uuid = app.primary_master_node.getUUID()
else:
primary_uuid = INVALID_UUID
known_master_list = [app.server + (app.uuid, )]
for n in app.nm.getMasterNodeList():
if n.getState() == BROKEN_STATE:
continue
known_master_list.append(n.getServer() + \
(n.getUUID() or INVALID_UUID, ))
conn.answer(protocol.answerPrimaryMaster(primary_uuid,
known_master_list), packet)
def handleAskNodeInformation(self, conn, packet):
self.app.sendNodesInformations(conn)
conn.answer(protocol.answerNodeInformation([]), packet)
def handleAskPartitionTable(self, conn, packet, offset_list):
assert len(offset_list) == 0
app = self.app
app.sendPartitionTable(conn)
conn.answer(protocol.answerPartitionTable(app.pt.getID(), []), packet)
......@@ -163,32 +163,11 @@ class RecoveryEventHandler(MasterEventHandler):
p = protocol.acceptNodeIdentification(MASTER_NODE_TYPE,
app.uuid, app.server[0], app.server[1],
app.pt.getPartitions(), app.pt.getReplicas(), uuid)
# Next, the peer should ask a primary master node.
conn.answer(p, packet)
@decorators.identification_required
def handleAskPrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
app = self.app
# Merely tell the peer that I am the primary master node.
# It is not necessary to send known master nodes, because
# I must send all node information immediately.
p = protocol.answerPrimaryMaster(app.uuid, [])
conn.answer(p, packet)
# Send the information.
app.sendNodesInformations(conn)
# If this is a storage node, ask the last IDs.
node = app.nm.getNodeByUUID(uuid)
if node.getNodeType() == STORAGE_NODE_TYPE:
if node_type is STORAGE_NODE_TYPE:
# ask the last IDs.
conn.ask(protocol.askLastIDs())
elif node.getNodeType() == ADMIN_NODE_TYPE and app.pt.getID() != INVALID_PTID:
# send partition table if exists
logging.info('sending partition table %s to %s' %
(dump(app.pt.getID()), conn.getAddress()))
app.sendPartitionTable(conn)
@decorators.identification_required
def handleAnnouncePrimaryMaster(self, conn, packet):
......
......@@ -56,48 +56,6 @@ class SecondaryEventHandler(MasterEventHandler):
node.setState(RUNNING_STATE)
MasterEventHandler.packetReceived(self, conn, packet)
@decorators.server_connection_required
def handleRequestNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port, name):
self.checkClusterName(name)
app = self.app
# Add a node only if it is a master node and I do not know it yet.
if node_type == MASTER_NODE_TYPE and uuid != INVALID_UUID:
addr = (ip_address, port)
node = app.nm.getNodeByServer(addr)
if node is None:
node = MasterNode(server = addr, uuid = uuid)
app.nm.add(node)
# Trust the UUID sent by the peer.
node.setUUID(uuid)
conn.setUUID(uuid)
p = protocol.acceptNodeIdentification(MASTER_NODE_TYPE,
app.uuid, app.server[0], app.server[1],
app.pt.getPartitions(), app.pt.getReplicas(),
uuid)
# Next, the peer should ask a primary master node.
conn.answer(p, packet)
@decorators.identification_required
@decorators.server_connection_required
def handleAskPrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
app = self.app
primary_uuid = app.primary_master_node.getUUID()
known_master_list = []
for n in app.nm.getMasterNodeList():
if n.getState() == BROKEN_STATE:
continue
info = n.getServer() + (n.getUUID() or INVALID_UUID,)
known_master_list.append(info)
p = protocol.answerPrimaryMaster(primary_uuid, known_master_list)
conn.answer(p, packet)
def handleAnnouncePrimaryMaster(self, conn, packet):
raise UnexpectedPacketError
......
......@@ -271,30 +271,6 @@ class ServiceEventHandler(MasterEventHandler):
# Next, the peer should ask a primary master node.
conn.answer(p, packet)
@decorators.identification_required
def handleAskPrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
app = self.app
# Merely tell the peer that I am the primary master node.
# It is not necessary to send known master nodes, because
# I must send all node information immediately.
conn.answer(protocol.answerPrimaryMaster(app.uuid, []), packet)
# Send the information.
logging.info('sending notify node information to %s:%d', *(conn.getAddress()))
app.sendNodesInformations(conn)
# If this is a storage node or a client node or an admin node, send the partition table.
node = app.nm.getNodeByUUID(uuid)
if node.getNodeType() in (STORAGE_NODE_TYPE, CLIENT_NODE_TYPE, ADMIN_NODE_TYPE):
logging.info('sending partition table to %s:%d', *(conn.getAddress()))
app.sendPartitionTable(conn)
# If this is a non-pending storage node, ask it to start.
if node.getNodeType() == STORAGE_NODE_TYPE and node.getState() != PENDING_STATE:
conn.notify(protocol.startOperation())
@decorators.identification_required
def handleAnnouncePrimaryMaster(self, conn, packet):
# I am also the primary... So restart the election.
......
......@@ -189,24 +189,6 @@ class VerificationEventHandler(MasterEventHandler):
# Next, the peer should ask a primary master node.
conn.answer(p, packet)
@decorators.identification_required
def handleAskPrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
app = self.app
# Merely tell the peer that I am the primary master node.
# It is not necessary to send known master nodes, because
# I must send all node information immediately.
conn.answer(protocol.answerPrimaryMaster(app.uuid, []), packet)
# Send the information.
app.sendNodesInformations(conn)
# If this is a storage node or an admin node, send the partition table.
node = app.nm.getNodeByUUID(uuid)
if node.getNodeType() in (STORAGE_NODE_TYPE, ADMIN_NODE_TYPE):
app.sendPartitionTable(conn)
@decorators.identification_required
def handleAnnouncePrimaryMaster(self, conn, packet):
uuid = conn.getUUID()
......
......@@ -192,7 +192,27 @@ class Application(object):
t = 0
while 1:
em.poll(1)
if self.primary_master_node is not None:
if self.trying_master_node is None:
if t + 1 < time():
# Choose a master node to connect to.
if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it.
self.trying_master_node = self.primary_master_node
else:
# Otherwise, check one by one.
master_list = nm.getMasterNodeList()
try:
self.trying_master_node = master_list[index]
except IndexError:
index = 0
self.trying_master_node = master_list[0]
index += 1
ClientConnection(em, handler, \
addr = self.trying_master_node.getServer(),
connector_handler = self.connector_handler)
t = time()
elif self.primary_master_node is self.trying_master_node:
# If I know which is a primary master node, check if
# I have a connection to it already.
for conn in em.getConnectionList():
......@@ -204,26 +224,6 @@ class Application(object):
# Yes, I have.
return
if self.trying_master_node is None and t + 1 < time():
# Choose a master node to connect to.
if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it.
self.trying_master_node = self.primary_master_node
else:
# Otherwise, check one by one.
master_list = nm.getMasterNodeList()
try:
self.trying_master_node = master_list[index]
except IndexError:
index = 0
self.trying_master_node = master_list[0]
index += 1
ClientConnection(em, handler, \
addr = self.trying_master_node.getServer(),
connector_handler = self.connector_handler)
t = time()
def verifyData(self):
"""Verify data under the control by a primary master node.
Connections from client nodes may not be accepted at this stage."""
......
......@@ -38,9 +38,7 @@ class BootstrapEventHandler(StorageEventHandler):
# Should not happen.
raise RuntimeError('connection completed while not trying to connect')
p = protocol.requestNodeIdentification(STORAGE_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.name)
conn.ask(p)
conn.ask(protocol.askPrimaryMaster())
StorageEventHandler.connectionCompleted(self, conn)
def connectionFailed(self, conn):
......@@ -179,10 +177,13 @@ class BootstrapEventHandler(StorageEventHandler):
logging.info('Got a new UUID from master : %s' % dump(app.uuid))
conn.setUUID(uuid)
node.setUUID(uuid)
#node.setUUID(uuid)
# Node UUID was set in handleAnswerPrimaryMaster
assert node.getUUID() == uuid
# Ask a primary master.
conn.ask(protocol.askPrimaryMaster())
# XXX: change handler for next packet (which might be handled in poll before it returns)
# This should be removed when we will handle our own pending packet queue.
conn.setHandler(VerificationEventHandler(app))
@decorators.client_connection_required
def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid,
......@@ -213,10 +214,6 @@ class BootstrapEventHandler(StorageEventHandler):
if app.trying_master_node is primary_node:
# I am connected to the right one.
logging.info('connected to a primary master node')
# This is a workaround to prevent handling of
# packets for the verification phase.
handler = VerificationEventHandler(app)
conn.setHandler(handler)
else:
app.trying_master_node = None
conn.close()
......@@ -228,6 +225,9 @@ class BootstrapEventHandler(StorageEventHandler):
app.trying_master_node = None
conn.close()
p = protocol.requestNodeIdentification(STORAGE_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.name)
conn.ask(p)
def handleAskLastIDs(self, conn, packet):
logging.warning('/!\ handleAskLastIDs')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment