Commit 142ad156 authored by Vincent Pelletier's avatar Vincent Pelletier

Don't wait for unreachable nodes in election.

parent b1123667
...@@ -116,8 +116,6 @@ class Application(object): ...@@ -116,8 +116,6 @@ class Application(object):
def _run(self): def _run(self):
"""Make sure that the status is sane and start a loop.""" """Make sure that the status is sane and start a loop."""
bootstrap = True
# Make a listening port. # Make a listening port.
self.listening_conn = ListeningConnection(self.em, None, self.listening_conn = ListeningConnection(self.em, None,
addr=self.server, connector=self.connector_handler()) addr=self.server, connector=self.connector_handler())
...@@ -127,8 +125,7 @@ class Application(object): ...@@ -127,8 +125,7 @@ class Application(object):
# (Re)elect a new primary master. # (Re)elect a new primary master.
self.primary = not self.nm.getMasterList() self.primary = not self.nm.getMasterList()
if not self.primary: if not self.primary:
self.electPrimary(bootstrap=bootstrap) self.electPrimary()
bootstrap = False
try: try:
if self.primary: if self.primary:
self.playPrimaryRole() self.playPrimaryRole()
...@@ -141,7 +138,7 @@ class Application(object): ...@@ -141,7 +138,7 @@ class Application(object):
conn.close() conn.close()
def electPrimary(self, bootstrap = True): def electPrimary(self):
"""Elect a primary master node. """Elect a primary master node.
The difficulty is that a master node must accept connections from The difficulty is that a master node must accept connections from
...@@ -166,34 +163,13 @@ class Application(object): ...@@ -166,34 +163,13 @@ class Application(object):
self.primary = None self.primary = None
self.primary_master_node = None self.primary_master_node = None
try: try:
# Wait at most 20 seconds at bootstrap. Otherwise, wait at most
# 10 seconds to avoid stopping the whole cluster for a long time.
# Note that even if not all master are up in the first 20 seconds
# this is not an issue because the first up will timeout and take
# the primary role.
if bootstrap:
expiration = 20
else:
expiration = 10
t = 0
while (self.unconnected_master_node_set or while (self.unconnected_master_node_set or
self.negotiating_master_node_set): self.negotiating_master_node_set):
current_time = time() for addr in self.unconnected_master_node_set:
if current_time >= t:
t = current_time + 1
for node in self.nm.getMasterList():
if not node.isRunning() and node.getLastStateChange() + \
expiration < current_time:
neo.lib.logging.info('%s is down' % (node, ))
node.setDown()
self.unconnected_master_node_set.discard(
node.getAddress())
# Try to connect to master nodes.
for addr in self.unconnected_master_node_set.difference(
x.getAddress() for x in self.em.getClientList()):
ClientConnection(self.em, client_handler, addr=addr, ClientConnection(self.em, client_handler, addr=addr,
connector=self.connector_handler()) connector=self.connector_handler())
self.negotiating_master_node_set.add(addr)
self.unconnected_master_node_set.clear()
self.em.poll(1) self.em.poll(1)
except ElectionFailure, m: except ElectionFailure, m:
# something goes wrong, clean then restart # something goes wrong, clean then restart
...@@ -217,7 +193,6 @@ class Application(object): ...@@ -217,7 +193,6 @@ class Application(object):
# Close all connections. # Close all connections.
for conn in self.em.getClientList() + self.em.getServerList(): for conn in self.em.getClientList() + self.em.getServerList():
conn.close() conn.close()
bootstrap = False
else: else:
# election succeed, stop the process # election succeed, stop the process
self.primary = self.primary is None self.primary = self.primary is None
......
...@@ -32,13 +32,6 @@ class ClientElectionHandler(MasterHandler): ...@@ -32,13 +32,6 @@ class ClientElectionHandler(MasterHandler):
def askPrimary(self, conn): def askPrimary(self, conn):
raise UnexpectedPacketError, "askPrimary on server connection" raise UnexpectedPacketError, "askPrimary on server connection"
def connectionStarted(self, conn):
addr = conn.getAddress()
# connection in progress
self.app.unconnected_master_node_set.remove(addr)
self.app.negotiating_master_node_set.add(addr)
super(ClientElectionHandler, self).connectionStarted(conn)
def connectionFailed(self, conn): def connectionFailed(self, conn):
addr = conn.getAddress() addr = conn.getAddress()
node = self.app.nm.getByAddress(addr) node = self.app.nm.getByAddress(addr)
...@@ -47,7 +40,6 @@ class ClientElectionHandler(MasterHandler): ...@@ -47,7 +40,6 @@ class ClientElectionHandler(MasterHandler):
node.getState()) node.getState())
# connection never success, node is still in unknown state # connection never success, node is still in unknown state
self.app.negotiating_master_node_set.discard(addr) self.app.negotiating_master_node_set.discard(addr)
self.app.unconnected_master_node_set.add(addr)
super(ClientElectionHandler, self).connectionFailed(conn) super(ClientElectionHandler, self).connectionFailed(conn)
def connectionCompleted(self, conn): def connectionCompleted(self, conn):
...@@ -120,7 +112,6 @@ class ClientElectionHandler(MasterHandler): ...@@ -120,7 +112,6 @@ class ClientElectionHandler(MasterHandler):
app.primary_master_node = primary_node app.primary_master_node = primary_node
# Stop waiting for connections than primary master's to # Stop waiting for connections than primary master's to
# complete to exit election phase ASAP. # complete to exit election phase ASAP.
app.unconnected_master_node_set.clear()
app.negotiating_master_node_set.clear() app.negotiating_master_node_set.clear()
primary_node = app.primary_master_node primary_node = app.primary_master_node
...@@ -200,7 +191,6 @@ class ServerElectionHandler(MasterHandler): ...@@ -200,7 +191,6 @@ class ServerElectionHandler(MasterHandler):
node = app.nm.getByUUID(uuid) node = app.nm.getByUUID(uuid)
app.primary = False app.primary = False
app.primary_master_node = node app.primary_master_node = node
app.unconnected_master_node_set.clear()
app.negotiating_master_node_set.clear() app.negotiating_master_node_set.clear()
neo.lib.logging.info('%s is the primary', node) neo.lib.logging.info('%s is the primary', node)
...@@ -47,8 +47,6 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -47,8 +47,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self.election = ClientElectionHandler(self.app) self.election = ClientElectionHandler(self.app)
self.app.unconnected_master_node_set = set() self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set() self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
# define some variable to simulate client and storage node # define some variable to simulate client and storage node
self.storage_port = 10021 self.storage_port = 10021
self.master_port = 10011 self.master_port = 10011
...@@ -70,22 +68,8 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -70,22 +68,8 @@ class MasterClientElectionTests(NeoUnitTestBase):
def _checkUnconnected(self, node): def _checkUnconnected(self, node):
addr = node.getAddress() addr = node.getAddress()
self.assertTrue(addr in self.app.unconnected_master_node_set)
self.assertFalse(addr in self.app.negotiating_master_node_set) self.assertFalse(addr in self.app.negotiating_master_node_set)
def _checkNegociating(self, node):
addr = node.getAddress()
self.assertTrue(addr in self.app.negotiating_master_node_set)
self.assertFalse(addr in self.app.unconnected_master_node_set)
def test_connectionStarted(self):
node, conn = self.identifyToMasterNode()
self.assertTrue(node.isUnknown())
self._checkUnconnected(node)
self.election.connectionStarted(conn)
self.assertTrue(node.isUnknown())
self._checkNegociating(node)
def test_connectionFailed(self): def test_connectionFailed(self):
node, conn = self.identifyToMasterNode() node, conn = self.identifyToMasterNode()
self.assertTrue(node.isUnknown()) self.assertTrue(node.isUnknown())
...@@ -107,7 +91,6 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -107,7 +91,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self._checkUnconnected(node) self._checkUnconnected(node)
addr = node.getAddress() addr = node.getAddress()
self.app.negotiating_master_node_set.add(addr) self.app.negotiating_master_node_set.add(addr)
self.app.unconnected_master_node_set.discard(addr)
def test_connectionClosed(self): def test_connectionClosed(self):
node, conn = self.identifyToMasterNode() node, conn = self.identifyToMasterNode()
...@@ -115,7 +98,6 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -115,7 +98,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self.election.connectionClosed(conn) self.election.connectionClosed(conn)
self.assertTrue(node.isUnknown()) self.assertTrue(node.isUnknown())
addr = node.getAddress() addr = node.getAddress()
self.assertFalse(addr in self.app.unconnected_master_node_set)
self.assertFalse(addr in self.app.negotiating_master_node_set) self.assertFalse(addr in self.app.negotiating_master_node_set)
def test_acceptIdentification1(self): def test_acceptIdentification1(self):
...@@ -124,7 +106,6 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -124,7 +106,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
args = (node.getUUID(), 0, 10, self.app.uuid) args = (node.getUUID(), 0, 10, self.app.uuid)
self.election.acceptIdentification(conn, self.election.acceptIdentification(conn,
NodeTypes.CLIENT, *args) NodeTypes.CLIENT, *args)
self.assertFalse(node in self.app.unconnected_master_node_set)
self.assertFalse(node in self.app.negotiating_master_node_set) self.assertFalse(node in self.app.negotiating_master_node_set)
self.checkClosed(conn) self.checkClosed(conn)
...@@ -173,7 +154,6 @@ class MasterClientElectionTests(NeoUnitTestBase): ...@@ -173,7 +154,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
node, conn = self.identifyToMasterNode() node, conn = self.identifyToMasterNode()
master_list = self._getMasterList() master_list = self._getMasterList()
self.election.answerPrimary(conn, node.getUUID(), master_list) self.election.answerPrimary(conn, node.getUUID(), master_list)
self.assertEqual(len(self.app.unconnected_master_node_set), 0)
self.assertEqual(len(self.app.negotiating_master_node_set), 0) self.assertEqual(len(self.app.negotiating_master_node_set), 0)
self.assertFalse(self.app.primary) self.assertFalse(self.app.primary)
self.assertEqual(self.app.primary_master_node, node) self.assertEqual(self.app.primary_master_node, node)
...@@ -194,7 +174,6 @@ class MasterServerElectionTests(NeoUnitTestBase): ...@@ -194,7 +174,6 @@ class MasterServerElectionTests(NeoUnitTestBase):
self.app.unconnected_master_node_set = set() self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set() self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList(): for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
node.setState(NodeStates.RUNNING) node.setState(NodeStates.RUNNING)
# define some variable to simulate client and storage node # define some variable to simulate client and storage node
self.client_address = (self.local_ip, 1000) self.client_address = (self.local_ip, 1000)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment