Commit bf03a305 authored by Julien Muchembled's avatar Julien Muchembled

Drop initial implementation of deadlock resolution

It was disabled long time ago and NEO has evolved in such a way that the new
implementation will be completely different.
parent c184ab48
...@@ -417,8 +417,7 @@ class Application(ThreadedApplication): ...@@ -417,8 +417,7 @@ class Application(ThreadedApplication):
logging.debug('storing oid %s serial %s', dump(oid), dump(serial)) logging.debug('storing oid %s serial %s', dump(oid), dump(serial))
self._store(self._txn_container.get(transaction), oid, serial, data) self._store(self._txn_container.get(transaction), oid, serial, data)
def _store(self, txn_context, oid, serial, data, data_serial=None, def _store(self, txn_context, oid, serial, data, data_serial=None):
unlock=False):
ttid = txn_context['ttid'] ttid = txn_context['ttid']
if data is None: if data is None:
# This is some undo: either a no-data object (undoing object # This is some undo: either a no-data object (undoing object
...@@ -451,7 +450,7 @@ class Application(ThreadedApplication): ...@@ -451,7 +450,7 @@ class Application(ThreadedApplication):
involved_nodes = txn_context['involved_nodes'] involved_nodes = txn_context['involved_nodes']
add_involved_nodes = involved_nodes.add add_involved_nodes = involved_nodes.add
packet = Packets.AskStoreObject(oid, serial, compression, packet = Packets.AskStoreObject(oid, serial, compression,
checksum, compressed_data, data_serial, ttid, unlock) checksum, compressed_data, data_serial, ttid)
for node, conn in self.cp.iterateForObject(oid): for node, conn in self.cp.iterateForObject(oid):
try: try:
conn.ask(packet, queue=queue, oid=oid, serial=serial) conn.ask(packet, queue=queue, oid=oid, serial=serial)
...@@ -499,27 +498,9 @@ class Application(ThreadedApplication): ...@@ -499,27 +498,9 @@ class Application(ThreadedApplication):
# To recover, we must ask storages to release locks we # To recover, we must ask storages to release locks we
# hold (to let possibly-competing transactions acquire # hold (to let possibly-competing transactions acquire
# them), and requeue our already-sent store requests. # them), and requeue our already-sent store requests.
# XXX: currently, brute-force is implemented: we send
# object data again.
# WARNING: not maintained code
logging.info('Deadlock avoidance triggered on %r:%r', logging.info('Deadlock avoidance triggered on %r:%r',
dump(oid), dump(serial)) dump(oid), dump(serial))
for store_oid, store_data in data_dict.iteritems(): raise NotImplementedError
store_serial = object_serial_dict[store_oid]
if store_data is CHECKED_SERIAL:
self._checkCurrentSerialInTransaction(txn_context,
store_oid, store_serial)
else:
if store_data is None:
# Some undo
logging.warning('Deadlock avoidance cannot reliably'
' work with undo, this must be implemented.')
conflict_serial = ZERO_TID
break
self._store(txn_context, store_oid, store_serial,
store_data, unlock=True)
else:
continue
else: else:
data = data_dict.pop(oid) data = data_dict.pop(oid)
if data is CHECKED_SERIAL: if data is CHECKED_SERIAL:
......
...@@ -156,7 +156,3 @@ class StorageAnswersHandler(AnswerBaseHandler): ...@@ -156,7 +156,3 @@ class StorageAnswersHandler(AnswerBaseHandler):
def answerFinalTID(self, conn, tid): def answerFinalTID(self, conn, tid):
self.app.setHandlerData(tid) self.app.setHandlerData(tid)
def alreadyPendingError(self, conn, message):
pass
...@@ -207,9 +207,6 @@ class EventHandler(object): ...@@ -207,9 +207,6 @@ class EventHandler(object):
def brokenNodeDisallowedError(self, conn, message): def brokenNodeDisallowedError(self, conn, message):
raise RuntimeError, 'broken node disallowed error: %s' % (message,) raise RuntimeError, 'broken node disallowed error: %s' % (message,)
def alreadyPendingError(self, conn, message):
logging.error('already pending error: %s', message)
def ack(self, conn, message): def ack(self, conn, message):
logging.debug("no error message: %s", message) logging.debug("no error message: %s", message)
......
...@@ -71,7 +71,6 @@ def ErrorCodes(): ...@@ -71,7 +71,6 @@ def ErrorCodes():
OID_DOES_NOT_EXIST OID_DOES_NOT_EXIST
PROTOCOL_ERROR PROTOCOL_ERROR
BROKEN_NODE BROKEN_NODE
ALREADY_PENDING
REPLICATION_ERROR REPLICATION_ERROR
CHECKING_ERROR CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED BACKEND_NOT_IMPLEMENTED
...@@ -954,7 +953,6 @@ class StoreObject(Packet): ...@@ -954,7 +953,6 @@ class StoreObject(Packet):
PString('data'), PString('data'),
PTID('data_serial'), PTID('data_serial'),
PTID('tid'), PTID('tid'),
PBoolean('unlock'),
) )
_answer = PStruct('answer_store_object', _answer = PStruct('answer_store_object',
......
...@@ -28,7 +28,6 @@ from neo.lib.util import dump ...@@ -28,7 +28,6 @@ from neo.lib.util import dump
from neo.lib.bootstrap import BootstrapManager from neo.lib.bootstrap import BootstrapManager
from .checker import Checker from .checker import Checker
from .database import buildDatabaseManager from .database import buildDatabaseManager
from .exception import AlreadyPendingError
from .handlers import identification, initialization from .handlers import identification, initialization
from .handlers import master, hidden from .handlers import master, hidden
from .replicator import Replicator from .replicator import Replicator
...@@ -70,7 +69,6 @@ class Application(BaseApplication): ...@@ -70,7 +69,6 @@ class Application(BaseApplication):
# operation related data # operation related data
self.event_queue = None self.event_queue = None
self.event_queue_dict = None
self.operational = False self.operational = False
# ready is True when operational and got all informations # ready is True when operational and got all informations
...@@ -190,7 +188,6 @@ class Application(BaseApplication): ...@@ -190,7 +188,6 @@ class Application(BaseApplication):
conn.close() conn.close()
# create/clear event queue # create/clear event queue
self.event_queue = deque() self.event_queue = deque()
self.event_queue_dict = {}
try: try:
self.initialize() self.initialize()
self.doOperation() self.doOperation()
...@@ -308,28 +305,14 @@ class Application(BaseApplication): ...@@ -308,28 +305,14 @@ class Application(BaseApplication):
if not node.isHidden(): if not node.isHidden():
break break
def queueEvent(self, some_callable, conn=None, args=(), key=None, def queueEvent(self, some_callable, conn=None, args=()):
raise_on_duplicate=True):
event_queue_dict = self.event_queue_dict
n = event_queue_dict.get(key)
if n and raise_on_duplicate:
raise AlreadyPendingError()
msg_id = None if conn is None else conn.getPeerId() msg_id = None if conn is None else conn.getPeerId()
self.event_queue.append((key, some_callable, msg_id, conn, args)) self.event_queue.append((some_callable, msg_id, conn, args))
if key is not None:
event_queue_dict[key] = n + 1 if n else 1
def executeQueuedEvents(self): def executeQueuedEvents(self):
p = self.event_queue.popleft p = self.event_queue.popleft
event_queue_dict = self.event_queue_dict
for _ in xrange(len(self.event_queue)): for _ in xrange(len(self.event_queue)):
key, some_callable, msg_id, conn, args = p() some_callable, msg_id, conn, args = p()
if key is not None:
n = event_queue_dict[key] - 1
if n:
event_queue_dict[key] = n
else:
del event_queue_dict[key]
if conn is None: if conn is None:
some_callable(*args) some_callable(*args)
elif not conn.isClosed(): elif not conn.isClosed():
...@@ -344,9 +327,8 @@ class Application(BaseApplication): ...@@ -344,9 +327,8 @@ class Application(BaseApplication):
if self.event_queue is None: if self.event_queue is None:
return return
logging.info("Pending events:") logging.info("Pending events:")
for key, event, _msg_id, _conn, args in self.event_queue: for event, msg_id, conn, args in self.event_queue:
logging.info(' %r:%r: %r:%r %r %r', key, event.__name__, logging.info(' %r: %r %r', event.__name__, msg_id, conn)
_msg_id, _conn, args)
def newTask(self, iterator): def newTask(self, iterator):
try: try:
......
#
# Copyright (C) 2010-2017 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
class AlreadyPendingError(Exception):
pass
...@@ -20,7 +20,6 @@ from neo.lib.util import dump, makeChecksum, add64 ...@@ -20,7 +20,6 @@ from neo.lib.util import dump, makeChecksum, add64
from neo.lib.protocol import Packets, Errors, ProtocolError, \ from neo.lib.protocol import Packets, Errors, ProtocolError, \
ZERO_HASH, INVALID_PARTITION ZERO_HASH, INVALID_PARTITION
from ..transactions import ConflictError, DelayedError, NotRegisteredError from ..transactions import ConflictError, DelayedError, NotRegisteredError
from ..exception import AlreadyPendingError
import time import time
# Log stores taking (incl. lock delays) more than this many seconds. # Log stores taking (incl. lock delays) more than this many seconds.
...@@ -71,25 +70,17 @@ class ClientOperationHandler(EventHandler): ...@@ -71,25 +70,17 @@ class ClientOperationHandler(EventHandler):
conn.answer(Packets.AnswerVoteTransaction()) conn.answer(Packets.AnswerVoteTransaction())
def _askStoreObject(self, conn, oid, serial, compression, checksum, data, def _askStoreObject(self, conn, oid, serial, compression, checksum, data,
data_serial, ttid, unlock, request_time): data_serial, ttid, request_time):
try: try:
self.app.tm.storeObject(ttid, serial, oid, compression, self.app.tm.storeObject(ttid, serial, oid, compression,
checksum, data, data_serial, unlock) checksum, data, data_serial)
except ConflictError, err: except ConflictError, err:
# resolvable or not # resolvable or not
conn.answer(Packets.AnswerStoreObject(err.tid)) conn.answer(Packets.AnswerStoreObject(err.tid))
except DelayedError: except DelayedError:
# locked by a previous transaction, retry later # locked by a previous transaction, retry later
# If we are unlocking, we want queueEvent to raise self.app.queueEvent(self._askStoreObject, conn, (oid, serial,
# AlreadyPendingError, to avoid making client wait for an unneeded compression, checksum, data, data_serial, ttid, request_time))
# response.
try:
self.app.queueEvent(self._askStoreObject, conn, (oid, serial,
compression, checksum, data, data_serial, ttid,
unlock, request_time), key=(oid, ttid),
raise_on_duplicate=unlock)
except AlreadyPendingError:
conn.answer(Errors.AlreadyPending(dump(oid)))
except NotRegisteredError: except NotRegisteredError:
# transaction was aborted, cancel this event # transaction was aborted, cancel this event
logging.info('Forget store of %s:%s by %s delayed by %s', logging.info('Forget store of %s:%s by %s delayed by %s',
...@@ -105,7 +96,7 @@ class ClientOperationHandler(EventHandler): ...@@ -105,7 +96,7 @@ class ClientOperationHandler(EventHandler):
conn.answer(Packets.AnswerStoreObject(None)) conn.answer(Packets.AnswerStoreObject(None))
def askStoreObject(self, conn, oid, serial, def askStoreObject(self, conn, oid, serial,
compression, checksum, data, data_serial, ttid, unlock): compression, checksum, data, data_serial, ttid):
if 1 < compression: if 1 < compression:
raise ProtocolError('invalid compression value') raise ProtocolError('invalid compression value')
# register the transaction # register the transaction
...@@ -117,7 +108,7 @@ class ClientOperationHandler(EventHandler): ...@@ -117,7 +108,7 @@ class ClientOperationHandler(EventHandler):
else: else:
checksum = data = None checksum = data = None
self._askStoreObject(conn, oid, serial, compression, checksum, data, self._askStoreObject(conn, oid, serial, compression, checksum, data,
data_serial, ttid, unlock, time.time()) data_serial, ttid, time.time())
def askTIDsFrom(self, conn, min_tid, max_tid, length, partition): def askTIDsFrom(self, conn, min_tid, max_tid, length, partition):
conn.answer(Packets.AnswerTIDsFrom(self.app.dm.getReplicationTIDList( conn.answer(Packets.AnswerTIDsFrom(self.app.dm.getReplicationTIDList(
...@@ -186,11 +177,8 @@ class ClientOperationHandler(EventHandler): ...@@ -186,11 +177,8 @@ class ClientOperationHandler(EventHandler):
conn.answer(Packets.AnswerCheckCurrentSerial(err.tid)) conn.answer(Packets.AnswerCheckCurrentSerial(err.tid))
except DelayedError: except DelayedError:
# locked by a previous transaction, retry later # locked by a previous transaction, retry later
try: self.app.queueEvent(self._askCheckCurrentSerial, conn,
self.app.queueEvent(self._askCheckCurrentSerial, conn, (ttid, (ttid, serial, oid, request_time))
serial, oid, request_time), key=(oid, ttid))
except AlreadyPendingError:
conn.answer(Errors.AlreadyPending(dump(oid)))
except NotRegisteredError: except NotRegisteredError:
# transaction was aborted, cancel this event # transaction was aborted, cancel this event
logging.info('Forget serial check of %s:%s by %s delayed by %s', logging.info('Forget serial check of %s:%s by %s delayed by %s',
......
...@@ -74,12 +74,6 @@ class Transaction(object): ...@@ -74,12 +74,6 @@ class Transaction(object):
assert oid not in self.checked_set, dump(oid) assert oid not in self.checked_set, dump(oid)
self.store_dict[oid] = oid, data_id, value_serial self.store_dict[oid] = oid, data_id, value_serial
def cancel(self, oid):
try:
return self.store_dict.pop(oid)[1]
except KeyError:
self.checked_set.remove(oid)
class TransactionManager(object): class TransactionManager(object):
""" """
...@@ -178,7 +172,7 @@ class TransactionManager(object): ...@@ -178,7 +172,7 @@ class TransactionManager(object):
def getLockingTID(self, oid): def getLockingTID(self, oid):
return self._store_lock_dict.get(oid) return self._store_lock_dict.get(oid)
def lockObject(self, ttid, serial, oid, unlock=False): def lockObject(self, ttid, serial, oid):
""" """
Take a write lock on given object, checking that "serial" is Take a write lock on given object, checking that "serial" is
current. current.
...@@ -188,19 +182,6 @@ class TransactionManager(object): ...@@ -188,19 +182,6 @@ class TransactionManager(object):
""" """
# check if the object if locked # check if the object if locked
locking_tid = self._store_lock_dict.get(oid) locking_tid = self._store_lock_dict.get(oid)
if locking_tid == ttid and unlock:
logging.info('Deadlock resolution on %r:%r', dump(oid), dump(ttid))
# A duplicate store means client is resolving a deadlock, so
# drop the lock it held on this object, and drop object data for
# consistency.
del self._store_lock_dict[oid]
data_id = self._transaction_dict[ttid].cancel(oid)
if data_id:
self._app.dm.pruneData((data_id,))
# Give a chance to pending events to take that lock now.
self._app.executeQueuedEvents()
# Attemp to acquire lock again.
locking_tid = self._store_lock_dict.get(oid)
if locking_tid is None: if locking_tid is None:
previous_serial = None previous_serial = None
elif locking_tid == ttid: elif locking_tid == ttid:
...@@ -250,11 +231,11 @@ class TransactionManager(object): ...@@ -250,11 +231,11 @@ class TransactionManager(object):
transaction = self._transaction_dict[ttid] transaction = self._transaction_dict[ttid]
except KeyError: except KeyError:
raise NotRegisteredError raise NotRegisteredError
self.lockObject(ttid, serial, oid, unlock=True) self.lockObject(ttid, serial, oid)
transaction.check(oid) transaction.check(oid)
def storeObject(self, ttid, serial, oid, compression, checksum, data, def storeObject(self, ttid, serial, oid, compression, checksum, data,
value_serial, unlock=False): value_serial):
""" """
Store an object received from client node Store an object received from client node
""" """
...@@ -262,7 +243,7 @@ class TransactionManager(object): ...@@ -262,7 +243,7 @@ class TransactionManager(object):
transaction = self._transaction_dict[ttid] transaction = self._transaction_dict[ttid]
except KeyError: except KeyError:
raise NotRegisteredError raise NotRegisteredError
self.lockObject(ttid, serial, oid, unlock=unlock) self.lockObject(ttid, serial, oid)
# store object # store object
if data is None: if data is None:
data_id = None data_id = None
......
...@@ -19,9 +19,7 @@ from ..mock import Mock ...@@ -19,9 +19,7 @@ from ..mock import Mock
from .. import NeoUnitTestBase from .. import NeoUnitTestBase
from neo.storage.app import Application from neo.storage.app import Application
from neo.lib.protocol import CellStates from neo.lib.protocol import CellStates
from collections import deque
from neo.lib.pt import PartitionTable from neo.lib.pt import PartitionTable
from neo.storage.exception import AlreadyPendingError
class StorageAppTests(NeoUnitTestBase): class StorageAppTests(NeoUnitTestBase):
...@@ -31,8 +29,6 @@ class StorageAppTests(NeoUnitTestBase): ...@@ -31,8 +29,6 @@ class StorageAppTests(NeoUnitTestBase):
# create an application object # create an application object
config = self.getStorageConfiguration(master_number=1) config = self.getStorageConfiguration(master_number=1)
self.app = Application(config) self.app = Application(config)
self.app.event_queue = deque()
self.app.event_queue_dict = {}
def _tearDown(self, success): def _tearDown(self, success):
self.app.close() self.app.close()
...@@ -121,26 +117,6 @@ class StorageAppTests(NeoUnitTestBase): ...@@ -121,26 +117,6 @@ class StorageAppTests(NeoUnitTestBase):
self.assertTrue(cell_list[0].getUUID() in (master_uuid, storage_uuid)) self.assertTrue(cell_list[0].getUUID() in (master_uuid, storage_uuid))
self.assertTrue(cell_list[1].getUUID() in (master_uuid, storage_uuid)) self.assertTrue(cell_list[1].getUUID() in (master_uuid, storage_uuid))
def test_02_queueEvent(self):
self.assertEqual(len(self.app.event_queue), 0)
msg_id = 1325136
event = Mock({'__repr__': 'event'})
conn = Mock({'__repr__': 'conn', 'getPeerId': msg_id})
key = 'foo'
self.app.queueEvent(event, conn, ("test", ), key=key)
self.assertEqual(len(self.app.event_queue), 1)
_key, _event, _msg_id, _conn, args = self.app.event_queue[0]
self.assertEqual(key, _key)
self.assertEqual(msg_id, _msg_id)
self.assertEqual(len(args), 1)
self.assertEqual(args[0], "test")
self.assertRaises(AlreadyPendingError, self.app.queueEvent, event,
conn, ("test2", ), key=key)
self.assertEqual(len(self.app.event_queue), 1)
self.app.queueEvent(event, conn, ("test3", ), key=key,
raise_on_duplicate=False)
self.assertEqual(len(self.app.event_queue), 2)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment