Commit d0d0c143 authored by Julien Muchembled's avatar Julien Muchembled

qa: fix tests to not loop forever when the master dies unexpectedly

parent e5fd0233
...@@ -32,8 +32,8 @@ from neo.lib.protocol import NodeTypes, Packets, \ ...@@ -32,8 +32,8 @@ from neo.lib.protocol import NodeTypes, Packets, \
from neo.lib.util import makeChecksum, dump from neo.lib.util import makeChecksum, dump
from neo.lib.locking import Empty, Lock from neo.lib.locking import Empty, Lock
from neo.lib.connection import MTClientConnection, ConnectionClosed from neo.lib.connection import MTClientConnection, ConnectionClosed
from .exception import NEOStorageError, NEOStorageCreationUndoneError from .exception import (NEOStorageError, NEOStorageCreationUndoneError,
from .exception import NEOStorageNotFoundError NEOStorageNotFoundError, NEOPrimaryMasterLost)
from .handlers import storage, master from .handlers import storage, master
from neo.lib.threaded_app import ThreadedApplication from neo.lib.threaded_app import ThreadedApplication
from .cache import ClientCache from .cache import ClientCache
...@@ -55,6 +55,11 @@ if SignalHandler: ...@@ -55,6 +55,11 @@ if SignalHandler:
class Application(ThreadedApplication): class Application(ThreadedApplication):
"""The client node application.""" """The client node application."""
# For tests only. Do not touch. We want tpc_finish to always recover when
# the transaction is really committed, no matter for how long the master
# is unreachable.
max_reconnection_to_master = float('inf')
def __init__(self, master_nodes, name, compress=True, **kw): def __init__(self, master_nodes, name, compress=True, **kw):
super(Application, self).__init__(parseMasterList(master_nodes), super(Application, self).__init__(parseMasterList(master_nodes),
name, **kw) name, **kw)
...@@ -179,12 +184,13 @@ class Application(ThreadedApplication): ...@@ -179,12 +184,13 @@ class Application(ThreadedApplication):
logging.debug('connecting to primary master...') logging.debug('connecting to primary master...')
self.start() self.start()
index = -1 index = -1
fail_count = 0
ask = self._ask ask = self._ask
handler = self.primary_bootstrap_handler handler = self.primary_bootstrap_handler
while 1: while 1:
self.ignore_invalidations = True self.ignore_invalidations = True
# Get network connection to primary master # Get network connection to primary master
while 1: while fail_count < self.max_reconnection_to_master:
self.nm.reset() self.nm.reset()
if self.primary_master_node is not None: if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it. # If I know a primary master node, pinpoint it.
...@@ -205,11 +211,15 @@ class Application(ThreadedApplication): ...@@ -205,11 +211,15 @@ class Application(ThreadedApplication):
try: try:
ask(conn, p, handler=handler) ask(conn, p, handler=handler)
except ConnectionClosed: except ConnectionClosed:
fail_count += 1
continue continue
# If we reached the primary master node, mark as connected # If we reached the primary master node, mark as connected
if self.primary_master_node is not None and \ if self.primary_master_node is not None and \
self.primary_master_node is self.trying_master_node: self.primary_master_node is self.trying_master_node:
break break
else:
raise NEOPrimaryMasterLost(
"Too many connection failures to the primary master")
logging.info('Connected to %s', self.primary_master_node) logging.info('Connected to %s', self.primary_master_node)
try: try:
# Request identification and required informations to be # Request identification and required informations to be
...@@ -223,6 +233,7 @@ class Application(ThreadedApplication): ...@@ -223,6 +233,7 @@ class Application(ThreadedApplication):
except ConnectionClosed: except ConnectionClosed:
logging.error('Connection to %s lost', self.trying_master_node) logging.error('Connection to %s lost', self.trying_master_node)
self.primary_master_node = None self.primary_master_node = None
fail_count += 1
logging.info("Connected and ready") logging.info("Connected and ready")
return conn return conn
......
...@@ -474,6 +474,7 @@ class NEOCluster(object): ...@@ -474,6 +474,7 @@ class NEOCluster(object):
master_nodes=master_nodes, master_nodes=master_nodes,
name=self.cluster_name, name=self.cluster_name,
**kw) **kw)
result.app.max_reconnection_to_master = 10
self.zodb_storage_list.append(result) self.zodb_storage_list.append(result)
return result return result
......
...@@ -410,6 +410,8 @@ class StorageApplication(ServerNode, neo.storage.app.Application): ...@@ -410,6 +410,8 @@ class StorageApplication(ServerNode, neo.storage.app.Application):
class ClientApplication(Node, neo.client.app.Application): class ClientApplication(Node, neo.client.app.Application):
max_reconnection_to_master = 10
def __init__(self, master_nodes, name, **kw): def __init__(self, master_nodes, name, **kw):
super(ClientApplication, self).__init__(master_nodes, name, **kw) super(ClientApplication, self).__init__(master_nodes, name, **kw)
self.poll_thread.node_name = name self.poll_thread.node_name = name
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment