Give new ids to clients whose ids were already reallocated

Although the change applies to any node with a temporary ids (all but storage), only clients don't have addresses and are therefore not recognizable. After a client is disconnected from the master and before reconnecting, another client may join the cluster and "steals" the id of the first client. This issue leads to stuck clients, failing in loop with exceptions like the following one: ERROR ZODB.Connection Couldn't load state for 0x0251 Traceback (most recent call last): File "ZODB/Connection.py", line 860, in setstate self._setstate(obj) File "ZODB/Connection.py", line 901, in _setstate p, serial = self._storage.load(obj._p_oid, '') File "neo/client/Storage.py", line 82, in load return self.app.load(oid)[:2] File "neo/client/app.py", line 353, in load data, tid, next_tid, _ = self._loadFromStorage(oid, tid, before_tid) File "neo/client/app.py", line 373, in _loadFromStorage for node, conn in self.cp.iterateForObject(oid, readable=True): File "neo/client/pool.py", line 91, in iterateForObject pt = self.app.pt File "neo/client/app.py", line 145, in __getattr__ self._getMasterConnection() File "neo/client/app.py", line 214, in _getMasterConnection result = self.master_conn = self._connectToPrimaryNode() File "neo/client/app.py", line 246, in _connectToPrimaryNode handler=handler) File "neo/lib/threaded_app.py", line 154, in _ask _handlePacket(qconn, qpacket, kw, handler) File "neo/lib/threaded_app.py", line 135, in _handlePacket handler.dispatch(conn, packet, kw) File "neo/lib/handler.py", line 66, in dispatch method(conn, *args, **kw) File "neo/lib/handler.py", line 188, in error getattr(self, Errors[code])(conn, message) File "neo/client/handlers/__init__.py", line 23, in protocolError raise StorageError("protocol error: %s" % message) StorageError: protocol error: already connected

Give new ids to clients whose ids were already reallocated
Although the change applies to any node with a temporary ids (all but storage), only clients don't have addresses and are therefore not recognizable. After a client is disconnected from the master and before reconnecting, another client may join the cluster and "steals" the id of the first client. This issue leads to stuck clients, failing in loop with exceptions like the following one: ERROR ZODB.Connection Couldn't load state for 0x0251 Traceback (most recent call last): File "ZODB/Connection.py", line 860, in setstate self._setstate(obj) File "ZODB/Connection.py", line 901, in _setstate p, serial = self._storage.load(obj._p_oid, '') File "neo/client/Storage.py", line 82, in load return self.app.load(oid)[:2] File "neo/client/app.py", line 353, in load data, tid, next_tid, _ = self._loadFromStorage(oid, tid, before_tid) File "neo/client/app.py", line 373, in _loadFromStorage for node, conn in self.cp.iterateForObject(oid, readable=True): File "neo/client/pool.py", line 91, in iterateForObject pt = self.app.pt File "neo/client/app.py", line 145, in __getattr__ self._getMasterConnection() File "neo/client/app.py", line 214, in _getMasterConnection result = self.master_conn = self._connectToPrimaryNode() File "neo/client/app.py", line 246, in _connectToPrimaryNode handler=handler) File "neo/lib/threaded_app.py", line 154, in _ask _handlePacket(qconn, qpacket, kw, handler) File "neo/lib/threaded_app.py", line 135, in _handlePacket handler.dispatch(conn, packet, kw) File "neo/lib/handler.py", line 66, in dispatch method(conn, *args, **kw) File "neo/lib/handler.py", line 188, in error getattr(self, Errors[code])(conn, message) File "neo/client/handlers/__init__.py", line 23, in protocolError raise StorageError("protocol error: %s" % message) StorageError: protocol error: already connected
d752aadb · Julien Muchembled · b62b8dc3 · d752aadb · d752aadb
Commit d752aadb authored Nov 21, 2016 by Julien Muchembled
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 7 deletions

neo/master/handlers/identification.py neo/master/handlers/identification.py +8 -4

neo/tests/threaded/test.py neo/tests/threaded/test.py +19 -3

No files found.
--- a/neo/master/handlers/identification.py
+++ b/neo/master/handlers/identification.py
@@ -32,12 +32,16 @@ class IdentificationHandler(MasterHandler):
        app = self.app
        if node:
            if node.isRunning():
-                # cloned/evil/buggy node connecting to us
-                raise ProtocolError('already connected')
+                if uuid > 0:
+                    # cloned/evil/buggy node connecting to us
+                    raise ProtocolError('already connected')
+                # The peer wants a temporary id that's already assigned.
+                # Let's give it another one.
+                node = uuid = None
            else:
                assert not node.isConnected()
-            node.setAddress(address)
-            node.setRunning()
+                node.setAddress(address)
+                node.setRunning()

        state = NodeStates.RUNNING
        if node_type == NodeTypes.CLIENT:

--- a/neo/tests/threaded/test.py
+++ b/neo/tests/threaded/test.py
@@ -1290,6 +1290,8 @@ class Test(NEOThreadedTest):
            m2c, = cluster.master.getConnectionList(cluster.client)
            cluster.client._cache.clear()
            c.cacheMinimize()
+            # Make the master disconnects the client when the latter is about
+            # to send a AskObject packet to the storage node.
            with cluster.client.filterConnection(cluster.storage) as c2s:
                c2s.add(disconnect)
                # Storages are currently notified of clients that get
@@ -1297,9 +1299,23 @@ class Test(NEOThreadedTest):
                # Should it change, the clients would have to disconnect on
                # their own.
                self.assertRaises(TransientError, getattr, c, "root")
-            with Patch(ClientOperationHandler,
-                    askObject=lambda orig, self, conn, *args: conn.close()):
-                self.assertRaises(NEOStorageError, getattr, c, "root")
+            uuid = cluster.client.uuid
+            # Let's use a second client to steal the node id of the first one.
+            client = cluster.newClient()
+            try:
+                client.sync()
+                self.assertEqual(uuid, client.uuid)
+                # The client reconnects successfully to the master and storage,
+                # with a different node id. This time, we get a different error
+                # if it's only disconnected from the storage.
+                with Patch(ClientOperationHandler,
+                        askObject=lambda orig, self, conn, *args: conn.close()):
+                    self.assertRaises(NEOStorageError, getattr, c, "root")
+                self.assertNotEqual(uuid, cluster.client.uuid)
+                # Second reconnection, for a successful load.
+                c.root
+            finally:
+                client.close()
        finally:
            cluster.stop()