Commit 652f1f0d authored by Julien Muchembled's avatar Julien Muchembled

storage: fix assertion failure in case of connection reset with a client node

Here is what happened after simulating a network failure between a client and
a storage:

C8

DEBUG   recv failed for <SSLSocketConnectorIPv6 at 0x7f8198027f90 fileno 17 ('xxxx:xxxx:120:cd8::90a1', 53970), opened to ('xxxx:xxxx:60:4c2c::25c3', 39085)>: ECONNRESET (Connection reset by peer)
DEBUG   connection closed for <MTClientConnection(uuid=S2, address=[xxxx:xxxx:60:4c2c::25c3]:39085, handler=StorageEventHandler, closed, client) at 7f81939a0950>
DEBUG   connection started for <MTClientConnection(uuid=S2, address=[xxxx:xxxx:60:4c2c::25c3]:39085, handler=StorageEventHandler, fd=17, on_close=onConnectionClosed, connecting, client) at 7f8192eb17d0>
PACKET  #0x0000 RequestIdentification          > S2 ([xxxx:xxxx:60:4c2c::25c3]:39085)        | (<EnumItem CLIENT (2)>, -536870904, None, '...', [], 1535555463.455761)
DEBUG   SSL handshake done for <SSLSocketConnectorIPv6 at 0x7f8192eb1850 fileno 17 ('xxxx:xxxx:120:cd8::90a1', 54014), opened to ('xxxx:xxxx:60:4c2c::25c3', 39085)>: ECDHE-RSA-AES256-GCM-SHA384 256
DEBUG   connection completed for <MTClientConnection(uuid=S2, address=[xxxx:xxxx:60:4c2c::25c3]:39085, handler=StorageEventHandler, fd=17, on_close=onConnectionClosed, client) at 7f8192eb17d0> (from xxxx:xxxx:120:cd8::90a1:54014)
DEBUG   <SSLSocketConnectorIPv6 at 0x7f8192eb1850 fileno 17 ('xxxx:xxxx:120:cd8::90a1', 54014), opened to ('xxxx:xxxx:60:4c2c::25c3', 39085)> closed in recv
DEBUG   connection closed for <MTClientConnection(uuid=S2, address=[xxxx:xxxx:60:4c2c::25c3]:39085, handler=StorageEventHandler, closed, client) at 7f8192eb17d0>
ERROR   Connection to <StorageNode(uuid=S2, address=[xxxx:xxxx:60:4c2c::25c3]:39085, state=RUNNING, connection=None, not identified) at 7f81a8874690> failed

S2

DEBUG   accepted a connection from xxxx:xxxx:120:cd8::90a1:54014
DEBUG   SSL handshake done for <SSLSocketConnectorIPv6 at 0x7f657144a910 fileno 22 ('xxxx:xxxx:60:4c2c::25c3', 39085), opened from ('xxxx:xxxx:120:cd8::90a1', 54014)>: ECDHE-RSA-AES256-GCM-SHA384 256
DEBUG   connection completed for <ServerConnection(uuid=None, address=[xxxx:xxxx:120:cd8::90a1]:54014, handler=IdentificationHandler, fd=22, server) at 7f657144a090> (from xxxx:xxxx:60:4c2c::25c3:39085)
PACKET  #0x0000 RequestIdentification          < None ([xxxx:xxxx:120:cd8::90a1]:54014)         | (<EnumItem CLIENT (2)>, -536870904, None, '...', [], 1535555463.455761)
DEBUG   connection closed for <ServerConnection(uuid=None, address=[xxxx:xxxx:120:cd8::90a1]:54014, handler=IdentificationHandler, closed, server) at 7f657144a090>
WARNING A connection was lost during identification
ERROR   Pre-mortem data:
ERROR   Traceback (most recent call last):
ERROR     File "neo/storage/app.py", line 194, in run
ERROR       self._run()
ERROR     File "neo/storage/app.py", line 225, in _run
ERROR       self.doOperation()
ERROR     File "neo/storage/app.py", line 310, in doOperation
ERROR       poll()
ERROR     File "neo/storage/app.py", line 134, in _poll
ERROR       self.em.poll(1)
ERROR     File "neo/lib/event.py", line 160, in poll
ERROR       to_process.process()
ERROR     File "neo/lib/connection.py", line 499, in process
ERROR       self._handlers.handle(self, self._queue.pop(0))
ERROR     File "neo/lib/connection.py", line 85, in handle
ERROR       self._handle(connection, packet)
ERROR     File "neo/lib/connection.py", line 100, in _handle
ERROR       pending[0][1].packetReceived(connection, packet)
ERROR     File "neo/lib/handler.py", line 123, in packetReceived
ERROR       self.dispatch(*args)
ERROR     File "neo/lib/handler.py", line 72, in dispatch
ERROR       method(conn, *args, **kw)
ERROR     File "neo/storage/handlers/identification.py", line 56, in requestIdentification
ERROR       assert not node.isConnected(), node
ERROR   AssertionError: <ClientNode(uuid=C8, state=RUNNING, connection=<ServerConnection(uuid=C8, address=[xxxx:xxxx:120:cd8::90a1]:53970, handler=ClientOperationHandler, fd=18, on_close=onConnectionClosed, server) at 7f657147d7d0>) at 7f65714d6cd0>
parent b54c1c68
...@@ -333,6 +333,7 @@ class Connection(BaseConnection): ...@@ -333,6 +333,7 @@ class Connection(BaseConnection):
return r, flags return r, flags
def setOnClose(self, callback): def setOnClose(self, callback):
assert not self.isClosed(), self
self._on_close = callback self._on_close = callback
def isClient(self): def isClient(self):
......
...@@ -130,6 +130,10 @@ class Node(object): ...@@ -130,6 +130,10 @@ class Node(object):
# the full-fledged functionality, and it is simpler this way. # the full-fledged functionality, and it is simpler this way.
if not force or conn.getPeerId() is not None or \ if not force or conn.getPeerId() is not None or \
type(conn.getHandler()) is not type(connection.getHandler()): type(conn.getHandler()) is not type(connection.getHandler()):
# It may also happen in case of a network failure that is only
# noticed by the peer. We'd like to accept the new connection
# immediately but it's quite complicated. At worst (keepalive
# packets dropped), 'conn' will be closed in ~ 1 minute.
raise ProtocolError("already connected") raise ProtocolError("already connected")
def on_closed(): def on_closed():
self._connection = connection self._connection = connection
...@@ -137,7 +141,6 @@ class Node(object): ...@@ -137,7 +141,6 @@ class Node(object):
self.setIdentified() self.setIdentified()
conn.setOnClose(on_closed) conn.setOnClose(on_closed)
conn.close() conn.close()
assert not connection.isClosed(), connection
connection.setOnClose(self.onConnectionClosed) connection.setOnClose(self.onConnectionClosed)
def getConnection(self): def getConnection(self):
......
...@@ -53,16 +53,17 @@ class IdentificationHandler(EventHandler): ...@@ -53,16 +53,17 @@ class IdentificationHandler(EventHandler):
handler = ClientReadOnlyOperationHandler handler = ClientReadOnlyOperationHandler
else: else:
handler = ClientOperationHandler handler = ClientOperationHandler
assert not node.isConnected(), node
assert node.isRunning(), node assert node.isRunning(), node
force = False
elif node_type == NodeTypes.STORAGE: elif node_type == NodeTypes.STORAGE:
handler = StorageOperationHandler handler = StorageOperationHandler
force = app.uuid < uuid
else: else:
raise ProtocolError('reject non-client-or-storage node') raise ProtocolError('reject non-client-or-storage node')
# apply the handler and set up the connection # apply the handler and set up the connection
handler = handler(self.app) handler = handler(self.app)
conn.setHandler(handler) conn.setHandler(handler)
node.setConnection(conn, app.uuid < uuid) node.setConnection(conn, force)
# accept the identification and trigger an event # accept the identification and trigger an event
conn.answer(Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and conn.answer(Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and
app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid)) app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment