Commit 2851a274 authored by Julien Muchembled's avatar Julien Muchembled

client: simplify connection management in transaction contexts

With previous commit, there's no point anymore to distinguish storage nodes
for which we only check serials.
parent ab435b28
......@@ -482,9 +482,8 @@ class Application(ThreadedApplication):
' with new locking TID %s', dump(ttid), dump(serial))
txn_context.locking_tid = serial
packet = Packets.AskRebaseTransaction(ttid, serial)
for uuid, status in txn_context.involved_nodes.iteritems():
if status < 2:
self._askStorageForWrite(txn_context, uuid, packet)
for uuid in txn_context.conn_dict:
self._askStorageForWrite(txn_context, uuid, packet)
else:
if data is CHECKED_SERIAL:
raise ReadConflictError(oid=oid,
......@@ -518,9 +517,11 @@ class Application(ThreadedApplication):
conn = txn_context.conn_dict[uuid]
try:
return conn.ask(packet, queue=txn_context.queue)
except AttributeError:
if conn is not None:
raise
except ConnectionClosed:
txn_context.involved_nodes[uuid] = 2
del txn_context.conn_dict[uuid]
txn_context.conn_dict[uuid] = None
def waitResponses(self, queue):
"""Wait for all requests to be answered (or their connection to be
......@@ -551,21 +552,21 @@ class Application(ThreadedApplication):
packet = Packets.AskStoreTransaction(ttid, str(transaction.user),
str(transaction.description), ext, txn_context.cache_dict)
queue = txn_context.queue
involved_nodes = txn_context.involved_nodes
conn_dict = txn_context.conn_dict
# Ask in parallel all involved storage nodes to commit object metadata.
# Nodes that store the transaction metadata get a special packet.
trans_nodes = txn_context.write(self, packet, ttid)
packet = Packets.AskVoteTransaction(ttid)
for uuid, status in involved_nodes.iteritems():
if status < 2 and uuid not in trans_nodes:
for uuid in conn_dict:
if uuid not in trans_nodes:
self._askStorageForWrite(txn_context, uuid, packet)
self.waitStoreResponses(txn_context)
if 2 in involved_nodes.itervalues(): # unlikely
if None in conn_dict.itervalues(): # unlikely
# If some writes failed, we must first check whether
# all oids have been locked by at least one node.
failed = {node.getUUID(): node.isRunning()
for node in self.nm.getStorageList()
if involved_nodes.get(node.getUUID()) == 2}
if conn_dict.get(node.getUUID(), 0) is None}
if txn_context.lockless_dict:
getCellList = self.pt.getCellList
for offset, uuid_set in txn_context.lockless_dict.iteritems():
......@@ -616,10 +617,11 @@ class Application(ThreadedApplication):
# forever.
p = Packets.AbortTransaction(txn_context.ttid, ())
for conn in txn_context.conn_dict.itervalues():
try:
conn.send(p)
except ConnectionClosed:
pass
if conn is not None:
try:
conn.send(p)
except ConnectionClosed:
pass
# Because we want to be sure that the involved nodes are notified,
# we still have to send the full list to the master. Most of the
# time, the storage nodes get 2 AbortTransaction packets, and the
......@@ -633,7 +635,7 @@ class Application(ThreadedApplication):
else:
try:
notify(Packets.AbortTransaction(txn_context.ttid,
txn_context.involved_nodes))
txn_context.conn_dict))
except ConnectionClosed:
pass
# We don't need to flush queue, as it won't be reused by future
......@@ -742,7 +744,7 @@ class Application(ThreadedApplication):
# conflicts. For example, if a network failure happened
# only between the client and the storage, the latter would
# still be readable until we commit.
if txn_context.involved_nodes.get(cell.getUUID(), 0) < 2]
if txn_context.conn_dict.get(cell.getUUID(), 0) is not None]
storage_conn = getConnForNode(
min(cell_list, key=getCellSortKey).getNode())
storage_conn.ask(Packets.AskObjectUndoSerial(ttid,
......@@ -949,6 +951,6 @@ class Application(ThreadedApplication):
assert oid not in txn_context.data_dict, oid
packet = Packets.AskCheckCurrentSerial(ttid, oid, serial)
txn_context.data_dict[oid] = CHECKED_SERIAL, serial, txn_context.write(
self, packet, oid, 0, oid=oid)
self, packet, oid, oid=oid)
self._waitAnyTransactionMessage(txn_context, False)
......@@ -99,9 +99,7 @@ class StorageAnswersHandler(AnswerBaseHandler):
conn.ask(Packets.AskRebaseObject(ttid, oid),
queue=queue, oid=oid)
except ConnectionClosed:
uuid = conn.getUUID()
txn_context.involved_nodes[uuid] = 2
del txn_context.conn_dict[uuid]
txn_context.conn_dict[conn.getUUID()] = None
def answerRebaseObject(self, conn, conflict, oid):
if conflict:
......@@ -116,7 +114,7 @@ class StorageAnswersHandler(AnswerBaseHandler):
# We should still be waiting for an answer from this node,
# unless we lost connection.
assert conn.uuid in txn_context.data_dict[oid][2] or \
txn_context.involved_nodes[conn.uuid] == 2
txn_context.conn_dict[conn.uuid] is None
return
assert oid in txn_context.data_dict
if serial <= txn_context.conflict_dict.get(oid, ''):
......
......@@ -49,36 +49,26 @@ class Transaction(object):
self.conflict_dict = {} # {oid: serial}
# resolved conflicts
self.resolved_dict = {} # {oid: serial}
# Keys are node ids instead of Node objects because a node may
# disappear from the cluster. In any case, we always have to check
# if the id is still known by the NodeManager.
# status: 0 -> check only, 1 -> store, 2 -> failed
self.involved_nodes = {} # {node_id: status}
# involved storage nodes; connection is None is connection was lost
self.conn_dict = {} # {node_id: connection}
def wakeup(self, conn):
self.queue.put((conn, _WakeupPacket, {}))
def write(self, app, packet, object_id, store=1, **kw):
def write(self, app, packet, object_id, **kw):
uuid_list = []
pt = app.pt
involved = self.involved_nodes
conn_dict = self.conn_dict
object_id = pt.getPartition(object_id)
for cell in pt.getCellList(object_id):
node = cell.getNode()
uuid = node.getUUID()
status = involved.get(uuid, -1)
if status < store:
involved[uuid] = store
elif status > 1:
continue
if status < 0:
conn = self.conn_dict[uuid] = app.cp.getConnForNode(node)
else:
conn = self.conn_dict[uuid]
if conn is not None:
try:
try:
if status < 0 and self.locking_tid and 'oid' in kw:
conn = conn_dict[uuid]
except KeyError:
conn = conn_dict[uuid] = app.cp.getConnForNode(node)
if self.locking_tid and 'oid' in kw:
# A deadlock happened but this node is not aware of it.
# Tell it to write-lock with the same locking tid as
# for the other nodes. The condition on kw is to
......@@ -86,13 +76,13 @@ class Transaction(object):
# transaction metadata.
conn.ask(Packets.AskRebaseTransaction(
self.ttid, self.locking_tid), queue=self.queue)
conn.ask(packet, queue=self.queue, **kw)
uuid_list.append(uuid)
continue
except ConnectionClosed:
pass
del self.conn_dict[uuid]
involved[uuid] = 2
conn.ask(packet, queue=self.queue, **kw)
uuid_list.append(uuid)
except AttributeError:
if conn is not None:
raise
except ConnectionClosed:
conn_dict[uuid] = None
if uuid_list:
return uuid_list
raise NEOStorageError(
......@@ -146,9 +136,9 @@ class Transaction(object):
self.cache_dict[oid] = data
def nodeLost(self, app, uuid):
# The following 2 lines are sometimes redundant with the 2 in write().
self.involved_nodes[uuid] = 2
self.conn_dict.pop(uuid, None)
# The following line is sometimes redundant
# with the one in `except ConnectionClosed:` clauses.
self.conn_dict[uuid] = None
for oid in list(self.data_dict):
self.written(app, uuid, oid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment