Commit 11d83ad9 authored by Julien Muchembled's avatar Julien Muchembled

client: fix conflict of node id by never reading from storage without being connected to the master

Client nodes ignored the state of the connection to the master node when reading
data from storage, as long as their partition tables were recent enough. This
way, they were able to finish read-only transactions even if they could't reach
the master, which could be useful for high availability. The downside is that
the master node ignored that their node ids were still used, which causes "uuid"
conflicts when reallocating them.

Rejected solutions:
- An unused NEO Storage should not insist in staying connected to master node.
- Reverting to big random node identifiers is a lot of work and it would make
  debugging annoying (see commit 23fad3af).
- Always increasing node ids could have been a simple solution if we accepted
  that the cluster dies after that all 2^24 possible ids were allocated.

Given that reading from storage without being connected to the master can only
be useful to finish the current transaction (because we always ping the master
at the beginning of every transaction), keeping such feature is not worth the
effort.

This commit fixes id conflicts in a very simple way, by clearing the partition
table upon primary node failure, which forces reconnection to the master before
querying any storage node. In such case, we raise a special exception that will
cause the transaction to be restarted, so that the user does not get errors for
temporary connection failures.
parent 4e17456b
...@@ -16,27 +16,6 @@ Although this should happen rarely enough not to affect performance, this can ...@@ -16,27 +16,6 @@ Although this should happen rarely enough not to affect performance, this can
be an issue if your application can't afford restarting the transaction, be an issue if your application can't afford restarting the transaction,
e.g. because it interacted with external environment. e.g. because it interacted with external environment.
(Z) Storage nodes rejecting clients after other clients got disconnected from the master
----------------------------------------------------------------------------------------
Client nodes ignore the state of the connection to the master node when reading
data from storage, as long as their partition tables are recent enough. This
way, they are able to finish read-only transactions even if they can't reach
the master, which may be useful for high availability. The downside is that the
master node ignores their node ids are still used, which causes "uuid"
conflicts when reallocating them.
Since an unused NEO Storage should not insist in staying connected to master
node, solutions are:
- change the way node ids are generated (either random or always increasing),
- or make storage nodes reject clients that aren't known by the master, but
this drops the ability to perform read-only operations as described above
(a client that is not connected to the master would be forced to reconnect
before any query to the storage).
Workaround: restart clients that aren't connected to the master
(N) Storage failure or update may lead to POSException or break undoLog() (N) Storage failure or update may lead to POSException or break undoLog()
------------------------------------------------------------------------- -------------------------------------------------------------------------
......
...@@ -27,7 +27,6 @@ class NEOStorageDoesNotExistError(NEOStorageNotFoundError): ...@@ -27,7 +27,6 @@ class NEOStorageDoesNotExistError(NEOStorageNotFoundError):
This error is a refinement of NEOStorageNotFoundError: this means This error is a refinement of NEOStorageNotFoundError: this means
that some object was not found, but also that it does not exist at all. that some object was not found, but also that it does not exist at all.
""" """
pass
class NEOStorageCreationUndoneError(NEOStorageDoesNotExistError): class NEOStorageCreationUndoneError(NEOStorageDoesNotExistError):
""" """
...@@ -35,3 +34,13 @@ class NEOStorageCreationUndoneError(NEOStorageDoesNotExistError): ...@@ -35,3 +34,13 @@ class NEOStorageCreationUndoneError(NEOStorageDoesNotExistError):
some object existed at some point, but its creation was undone. some object existed at some point, but its creation was undone.
""" """
# TODO: Inherit from transaction.interfaces.TransientError
# (not recognized yet by ERP5 as a transient error).
class NEOPrimaryMasterLost(POSException.ReadConflictError):
"""
A read operation to a storage node failed because we get disconnected from
the primary master (which causes all connections to storages to be closed
as well). The connection failure to the master may be temporary so we'd
like to restart the transaction; if it isn't, we'll get failures when
trying to reconnect.
"""
...@@ -149,6 +149,10 @@ class PrimaryNotificationsHandler(MTEventHandler): ...@@ -149,6 +149,10 @@ class PrimaryNotificationsHandler(MTEventHandler):
app.master_conn = None app.master_conn = None
for txn_context in app.txn_contexts(): for txn_context in app.txn_contexts():
txn_context['error'] = msg txn_context['error'] = msg
try:
del app.pt
except AttributeError:
pass
app.primary_master_node = None app.primary_master_node = None
super(PrimaryNotificationsHandler, self).connectionClosed(conn) super(PrimaryNotificationsHandler, self).connectionClosed(conn)
......
...@@ -22,7 +22,7 @@ from neo.lib.locking import Lock ...@@ -22,7 +22,7 @@ from neo.lib.locking import Lock
from neo.lib.protocol import NodeTypes, Packets from neo.lib.protocol import NodeTypes, Packets
from neo.lib.connection import MTClientConnection, ConnectionClosed from neo.lib.connection import MTClientConnection, ConnectionClosed
from neo.lib.exception import NodeNotReady from neo.lib.exception import NodeNotReady
from .exception import NEOStorageError from .exception import NEOPrimaryMasterLost, NEOStorageError
# How long before we might retry a connection to a node to which connection # How long before we might retry a connection to a node to which connection
# failed in the past. # failed in the past.
...@@ -52,6 +52,8 @@ class ConnectionPool(object): ...@@ -52,6 +52,8 @@ class ConnectionPool(object):
def _initNodeConnection(self, node): def _initNodeConnection(self, node):
"""Init a connection to a given storage node.""" """Init a connection to a given storage node."""
app = self.app app = self.app
if app.master_conn is None:
raise NEOPrimaryMasterLost
logging.debug('trying to connect to %s - %s', node, node.getState()) logging.debug('trying to connect to %s - %s', node, node.getState())
conn = MTClientConnection(app, app.storage_event_handler, node, conn = MTClientConnection(app, app.storage_event_handler, node,
dispatcher=app.dispatcher) dispatcher=app.dispatcher)
...@@ -123,9 +125,11 @@ class ConnectionPool(object): ...@@ -123,9 +125,11 @@ class ConnectionPool(object):
# state can have changed during connection attempt. # state can have changed during connection attempt.
elif node.isRunning(): elif node.isRunning():
new_cell_list.append(cell) new_cell_list.append(cell)
if not new_cell_list or self.app.master_conn is None: if not new_cell_list:
break break
cell_list = new_cell_list cell_list = new_cell_list
if self.app.master_conn is None:
raise NEOPrimaryMasterLost
def getConnForNode(self, node): def getConnForNode(self, node):
"""Return a locked connection object to a given node """Return a locked connection object to a given node
......
...@@ -586,7 +586,12 @@ class NodeManager(object): ...@@ -586,7 +586,12 @@ class NodeManager(object):
logging.debug('droping node %r (%r), found with %s ' logging.debug('droping node %r (%r), found with %s '
'%s %s %s', node, node.isConnected(), *log_args) '%s %s %s', node, node.isConnected(), *log_args)
if node.isConnected(): if node.isConnected():
# cut this connection, node removed by handler # Cut this connection, node removed by handler.
# It's important for a storage to disconnect nodes that
# aren't connected to the primary master, in order to
# avoid conflict of node id. The clients will first
# reconnect to the master because they cleared their
# partition table upon disconnection.
node.getConnection().close() node.getConnection().close()
self.remove(node) self.remove(node)
else: else:
......
...@@ -22,6 +22,7 @@ import unittest ...@@ -22,6 +22,7 @@ import unittest
from thread import get_ident from thread import get_ident
from zlib import compress from zlib import compress
from persistent import Persistent, GHOST from persistent import Persistent, GHOST
from transaction.interfaces import TransientError
from ZODB import DB, POSException from ZODB import DB, POSException
from ZODB.DB import TransactionalUndo from ZODB.DB import TransactionalUndo
from neo.storage.transactions import TransactionManager, \ from neo.storage.transactions import TransactionManager, \
...@@ -36,6 +37,7 @@ from neo.lib.util import add64, makeChecksum, p64, u64 ...@@ -36,6 +37,7 @@ from neo.lib.util import add64, makeChecksum, p64, u64
from neo.client.exception import NEOStorageError from neo.client.exception import NEOStorageError
from neo.client.pool import CELL_CONNECTED, CELL_GOOD from neo.client.pool import CELL_CONNECTED, CELL_GOOD
from neo.master.handlers.client import ClientServiceHandler from neo.master.handlers.client import ClientServiceHandler
from neo.storage.handlers.client import ClientOperationHandler
from neo.storage.handlers.initialization import InitializationHandler from neo.storage.handlers.initialization import InitializationHandler
class PCounter(Persistent): class PCounter(Persistent):
...@@ -979,10 +981,6 @@ class Test(NEOThreadedTest): ...@@ -979,10 +981,6 @@ class Test(NEOThreadedTest):
cluster.stop() cluster.stop()
def testClientReconnection(self): def testClientReconnection(self):
conn = [None]
def getConnForNode(orig, node):
self.assertTrue(node.isRunning())
return conn.pop()
cluster = NEOCluster() cluster = NEOCluster()
try: try:
cluster.start() cluster.start()
...@@ -1012,15 +1010,11 @@ class Test(NEOThreadedTest): ...@@ -1012,15 +1010,11 @@ class Test(NEOThreadedTest):
client.close() client.close()
self.tic() self.tic()
# Check reconnection to storage. # Check reconnection to the master and storage.
with Patch(cluster.client.cp, getConnForNode=getConnForNode):
self.assertFalse(cluster.client.history(x1._p_oid))
self.assertFalse(conn)
self.assertTrue(cluster.client.history(x1._p_oid)) self.assertTrue(cluster.client.history(x1._p_oid))
self.assertIsNot(None, cluster.client.master_conn)
# Check successful reconnection to master.
t1.begin() t1.begin()
self.assertEqual(x1._p_changed ,None) self.assertEqual(x1._p_changed, None)
self.assertEqual(x1.value, 1) self.assertEqual(x1.value, 1)
finally: finally:
cluster.stop() cluster.stop()
...@@ -1280,6 +1274,30 @@ class Test(NEOThreadedTest): ...@@ -1280,6 +1274,30 @@ class Test(NEOThreadedTest):
conn.em.poll(1) conn.em.poll(1)
self.assertFalse(conn.isClosed()) self.assertFalse(conn.isClosed())
def testClientDisconnectedFromMaster(self):
def disconnect(conn, packet):
if isinstance(packet, Packets.AskObject):
m2c.close()
#return True
cluster = NEOCluster()
try:
cluster.start()
t, c = cluster.getTransaction()
m2c, = cluster.master.getConnectionList(cluster.client)
cluster.client._cache.clear()
with cluster.client.filterConnection(cluster.storage) as c2s:
c2s.add(disconnect)
# Storages are currently notified of clients that get
# disconnected from the master and disconnect them in turn.
# Should it change, the clients would have to disconnect on
# their own.
self.assertRaises(TransientError, getattr, c, "root")
with Patch(ClientOperationHandler,
askObject=lambda orig, self, conn, *args: conn.close()):
self.assertRaises(NEOStorageError, getattr, c, "root")
finally:
cluster.stop()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment