Commit 8296c3c7 authored by Yoshinori Okuji's avatar Yoshinori Okuji

A lot of bugfixes.

git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@222 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 440d0981
This diff is collapsed.
...@@ -66,7 +66,7 @@ class Dispatcher(Thread): ...@@ -66,7 +66,7 @@ class Dispatcher(Thread):
if app.pt is not None and app.pt.operational(): if app.pt is not None and app.pt.operational():
# Connected to primary master node and got all informations # Connected to primary master node and got all informations
break break
app.node_not_ready = 0 app.local_var.node_not_ready = 0
if app.primary_master_node is None: if app.primary_master_node is None:
# Try with master node defined in config # Try with master node defined in config
addr, port = app.master_node_list[master_index].split(':') addr, port = app.master_node_list[master_index].split(':')
...@@ -110,7 +110,7 @@ class Dispatcher(Thread): ...@@ -110,7 +110,7 @@ class Dispatcher(Thread):
elif app.primary_master_node.getServer() != (addr, port): elif app.primary_master_node.getServer() != (addr, port):
# Master node changed, connect to new one # Master node changed, connect to new one
break break
elif app.node_not_ready: elif app.local_var.node_not_ready:
# Wait a bit and reask again # Wait a bit and reask again
break break
elif app.pt is not None and app.pt.operational(): elif app.pt is not None and app.pt.operational():
......
...@@ -45,6 +45,36 @@ class ClientEventHandler(EventHandler): ...@@ -45,6 +45,36 @@ class ClientEventHandler(EventHandler):
# put message in request queue # put message in request queue
dispatcher._request_queue.put((conn, packet)) dispatcher._request_queue.put((conn, packet))
def _dealWithStorageFailure(self, conn, node, state):
app = self.app
# Remove from pool connection
app.cp.removeConnection(node)
# Put fake packets to task queues.
queue_set = set()
for key in self.dispatcher.message_table.keys():
if id(conn) == key[0]:
queue = self.dispatcher.message_table.pop(key)
queue_set.add(queue)
for queue in queue_set:
queue.put((conn, None))
# Notify the primary master node of the failure.
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port,
node.getUUID(), state)]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
def connectionFailed(self, conn): def connectionFailed(self, conn):
app = self.app app = self.app
uuid = conn.getUUID() uuid = conn.getUUID()
...@@ -59,15 +89,16 @@ class ClientEventHandler(EventHandler): ...@@ -59,15 +89,16 @@ class ClientEventHandler(EventHandler):
self.dispatcher.connectToPrimaryMasterNode(app) self.dispatcher.connectToPrimaryMasterNode(app)
else: else:
# Connection to a storage node failed # Connection to a storage node failed
app.storage_node = -1 node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode):
self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
EventHandler.connectionFailed(self, conn) EventHandler.connectionFailed(self, conn)
def connectionClosed(self, conn): def connectionClosed(self, conn):
uuid = conn.getUUID() uuid = conn.getUUID()
app = self.app app = self.app
if app.master_conn is None: if app.master_conn is not None and uuid == app.master_conn.getUUID():
EventHandler.connectionClosed(self, conn)
elif uuid == app.master_conn.getUUID():
logging.critical("connection to primary master node closed") logging.critical("connection to primary master node closed")
# Close connection # Close connection
app.master_conn.close() app.master_conn.close()
...@@ -76,29 +107,14 @@ class ClientEventHandler(EventHandler): ...@@ -76,29 +107,14 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...") logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app) self.dispatcher.connectToPrimaryMasterNode(app)
else: else:
app = self.app node = app.nm.getNodeByServer(conn.getAddress())
node = app.nm.getNodeByUUID(uuid)
if node is not None:
logging.info("connection to storage node %s closed",
node.getServer())
if isinstance(node, StorageNode): if isinstance(node, StorageNode):
# Notify primary master node that a storage node is temporarily down # Notify primary master node that a storage node is temporarily down
conn = app.master_conn logging.info("connection to storage node %s closed",
if conn is not None: node.getServer())
conn.lock() self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
try:
msg_id = conn.getNextId() EventHandler.connectionClosed(self, conn)
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
TEMPORARILY_DOWN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
EventHandler.connectionClosed(self, conn)
def timeoutExpired(self, conn): def timeoutExpired(self, conn):
uuid = conn.getUUID() uuid = conn.getUUID()
...@@ -109,24 +125,12 @@ class ClientEventHandler(EventHandler): ...@@ -109,24 +125,12 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...") logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app) self.dispatcher.connectToPrimaryMasterNode(app)
else: else:
node = app.nm.getNodeByUUID(uuid) node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode): if isinstance(node, StorageNode):
# Notify primary master node that a storage node is temporarily down # Notify primary master node that a storage node is
conn = app.master_conn # temporarily down.
if conn is not None: self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
TEMPORARILY_DOWN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
EventHandler.timeoutExpired(self, conn) EventHandler.timeoutExpired(self, conn)
def peerBroken(self, conn): def peerBroken(self, conn):
...@@ -138,31 +142,17 @@ class ClientEventHandler(EventHandler): ...@@ -138,31 +142,17 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...") logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app) self.dispatcher.connectToPrimaryMasterNode(app)
else: else:
node = app.nm.getNodeByUUID(uuid) node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode): if isinstance(node, StorageNode):
# Notify primary master node that a storage node is broken self._dealWithStorageFailure(conn, node, BROKEN_STATE)
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
BROKEN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
EventHandler.peerBroken(self, conn) EventHandler.peerBroken(self, conn)
def handleNotReady(self, conn, packet, message): def handleNotReady(self, conn, packet, message):
if isinstance(conn, MTClientConnection): if isinstance(conn, MTClientConnection):
app = self.app app = self.app
app.node_not_ready = 1 app.local_var.node_not_ready = 1
else: else:
self.handleUnexpectedPacket(conn, packet) self.handleUnexpectedPacket(conn, packet)
......
...@@ -167,12 +167,15 @@ class EpollEventManager(object): ...@@ -167,12 +167,15 @@ class EpollEventManager(object):
def poll(self, timeout = 1): def poll(self, timeout = 1):
rlist, wlist = self.epoll.poll(timeout) rlist, wlist = self.epoll.poll(timeout)
for fd in rlist: for fd in rlist:
conn = self.connection_dict[fd]
conn.lock()
try: try:
conn.readable() conn = self.connection_dict[fd]
finally: conn.lock()
conn.unlock() try:
conn.readable()
finally:
conn.unlock()
except KeyError:
pass
for fd in wlist: for fd in wlist:
# This can fail, if a connection is closed in readable(). # This can fail, if a connection is closed in readable().
......
from time import time from time import time
import logging
from neo.protocol import RUNNING_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \ from neo.protocol import RUNNING_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \
MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE
from neo.util import dump
class Node(object): class Node(object):
"""This class represents a node.""" """This class represents a node."""
......
...@@ -221,10 +221,6 @@ class Application(object): ...@@ -221,10 +221,6 @@ class Application(object):
for conn in em.getConnectionList(): for conn in em.getConnectionList():
conn.setHandler(handler) conn.setHandler(handler)
# Forget all client nodes.
for node in nm.getClientNodeList():
nm.remove(node)
# Forget all unfinished data. # Forget all unfinished data.
self.dm.dropUnfinishedData() self.dm.dropUnfinishedData()
......
...@@ -99,6 +99,7 @@ class StorageEventHandler(EventHandler): ...@@ -99,6 +99,7 @@ class StorageEventHandler(EventHandler):
self.handleUnexpectedPacket(conn, packet) self.handleUnexpectedPacket(conn, packet)
return return
logging.debug('handleNotifyNodeInformation: node_list = %r', node_list)
app = self.app app = self.app
node = app.nm.getNodeByUUID(uuid) node = app.nm.getNodeByUUID(uuid)
if not isinstance(node, MasterNode) \ if not isinstance(node, MasterNode) \
...@@ -142,12 +143,15 @@ class StorageEventHandler(EventHandler): ...@@ -142,12 +143,15 @@ class StorageEventHandler(EventHandler):
if state == RUNNING_STATE: if state == RUNNING_STATE:
n = app.nm.getNodeByUUID(uuid) n = app.nm.getNodeByUUID(uuid)
if n is None: if n is None:
logging.debug('adding client node %s', dump(uuid))
n = ClientNode(uuid = uuid) n = ClientNode(uuid = uuid)
app.nm.add(n) app.nm.add(n)
assert app.nm.getNodeByUUID(uuid) is n
else: else:
self.dealWithClientFailure(uuid) self.dealWithClientFailure(uuid)
n = app.nm.getNodeByUUID(uuid) n = app.nm.getNodeByUUID(uuid)
if n is not None: if n is not None:
logging.debug('removing client node %s', dump(uuid))
app.nm.remove(n) app.nm.remove(n)
def handleAskLastIDs(self, conn, packet): def handleAskLastIDs(self, conn, packet):
......
...@@ -137,7 +137,7 @@ class OperationEventHandler(StorageEventHandler): ...@@ -137,7 +137,7 @@ class OperationEventHandler(StorageEventHandler):
else: else:
# If I do not know such a node, and it is not even a master # If I do not know such a node, and it is not even a master
# node, simply reject it. # node, simply reject it.
logging.error('reject an unknown node') logging.error('reject an unknown node %s', dump(uuid))
conn.addPacket(Packet().notReady(packet.getId(), conn.addPacket(Packet().notReady(packet.getId(),
'unknown node')) 'unknown node'))
conn.abort() conn.abort()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment