Commit dec81519 authored by Julien Muchembled's avatar Julien Muchembled

master: last tid/oid after recovery/verification

The important bugfix is to update the last oid when the master verifies a
transaction with new oids.

By resetting the transaction manager at the beginning of the recovery phase,
it become possible to avoid tid/oid holes:
- by reallocating previously unused allocated oids
- when going back "in the past", i.e. reverting to an older version of the
  database (with fewer oids) and/or adjusting the clock
parent e1f9a7da
...@@ -264,8 +264,6 @@ class Application(BaseApplication): ...@@ -264,8 +264,6 @@ class Application(BaseApplication):
""" """
logging.info('provide service') logging.info('provide service')
poll = self.em.poll poll = self.em.poll
self.tm.reset()
self.changeClusterState(ClusterStates.RUNNING) self.changeClusterState(ClusterStates.RUNNING)
# Now everything is passive. # Now everything is passive.
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
from neo.lib import logging from neo.lib import logging
from neo.lib.util import dump from neo.lib.util import dump
from neo.lib.protocol import Packets, ProtocolError, ClusterStates, NodeStates from neo.lib.protocol import Packets, ProtocolError, ClusterStates, NodeStates
from neo.lib.protocol import ZERO_OID
from .handlers import MasterHandler from .handlers import MasterHandler
...@@ -49,6 +48,7 @@ class RecoveryManager(MasterHandler): ...@@ -49,6 +48,7 @@ class RecoveryManager(MasterHandler):
""" """
logging.info('begin the recovery of the status') logging.info('begin the recovery of the status')
app = self.app app = self.app
app.tm.reset()
pt = app.pt pt = app.pt
app.changeClusterState(ClusterStates.RECOVERING) app.changeClusterState(ClusterStates.RECOVERING)
pt.clear() pt.clear()
...@@ -88,8 +88,6 @@ class RecoveryManager(MasterHandler): ...@@ -88,8 +88,6 @@ class RecoveryManager(MasterHandler):
if pt.getID() is None: if pt.getID() is None:
logging.info('creating a new partition table') logging.info('creating a new partition table')
# reset IDs generators & build new partition with running nodes
app.tm.setLastOID(ZERO_OID)
pt.make(node_list) pt.make(node_list)
self._notifyAdmins(Packets.SendPartitionTable( self._notifyAdmins(Packets.SendPartitionTable(
pt.getID(), pt.getRowList())) pt.getID(), pt.getRowList()))
...@@ -102,7 +100,6 @@ class RecoveryManager(MasterHandler): ...@@ -102,7 +100,6 @@ class RecoveryManager(MasterHandler):
pt.setBackupTidDict(self.backup_tid_dict) pt.setBackupTidDict(self.backup_tid_dict)
app.backup_tid = pt.getBackupTid() app.backup_tid = pt.getBackupTid()
app.setLastTransaction(app.tm.getLastTID())
logging.debug('cluster starts with loid=%s and this partition table :', logging.debug('cluster starts with loid=%s and this partition table :',
dump(app.tm.getLastOID())) dump(app.tm.getLastOID()))
pt.log() pt.log()
...@@ -121,11 +118,9 @@ class RecoveryManager(MasterHandler): ...@@ -121,11 +118,9 @@ class RecoveryManager(MasterHandler):
conn.ask(Packets.AskLastIDs()) conn.ask(Packets.AskLastIDs())
def answerLastIDs(self, conn, loid, ltid, lptid, backup_tid): def answerLastIDs(self, conn, loid, ltid, lptid, backup_tid):
# Get max values. tm = self.app.tm
if loid is not None: tm.setLastOID(loid)
self.app.tm.setLastOID(loid) tm.setLastTID(ltid)
if ltid is not None:
self.app.tm.setLastTID(ltid)
if lptid > self.target_ptid: if lptid > self.target_ptid:
# something newer # something newer
self.target_ptid = lptid self.target_ptid = lptid
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
from time import time from time import time
from struct import pack, unpack from struct import pack, unpack
from neo.lib import logging from neo.lib import logging
from neo.lib.protocol import ProtocolError, uuid_str, ZERO_TID from neo.lib.protocol import ProtocolError, uuid_str, ZERO_OID, ZERO_TID
from neo.lib.util import dump, u64, addTID, tidFromTime from neo.lib.util import dump, u64, addTID, tidFromTime
class DelayedError(Exception): class DelayedError(Exception):
...@@ -155,15 +155,18 @@ class TransactionManager(object): ...@@ -155,15 +155,18 @@ class TransactionManager(object):
""" """
Manage current transactions Manage current transactions
""" """
_last_tid = ZERO_TID
def __init__(self, on_commit): def __init__(self, on_commit):
self._on_commit = on_commit
self.reset()
def reset(self):
# ttid -> transaction # ttid -> transaction
self._ttid_dict = {} self._ttid_dict = {}
# node -> transactions mapping # node -> transactions mapping
self._node_dict = {} self._node_dict = {}
self._last_oid = None self._last_oid = ZERO_OID
self._on_commit = on_commit self._last_tid = ZERO_TID
# queue filled with ttids pointing to transactions with increasing tids # queue filled with ttids pointing to transactions with increasing tids
self._queue = [] self._queue = []
...@@ -182,8 +185,6 @@ class TransactionManager(object): ...@@ -182,8 +185,6 @@ class TransactionManager(object):
def getNextOIDList(self, num_oids): def getNextOIDList(self, num_oids):
""" Generate a new OID list """ """ Generate a new OID list """
if self._last_oid is None:
raise RuntimeError, 'I do not know the last OID'
oid = unpack('!Q', self._last_oid)[0] + 1 oid = unpack('!Q', self._last_oid)[0] + 1
oid_list = [pack('!Q', oid + i) for i in xrange(num_oids)] oid_list = [pack('!Q', oid + i) for i in xrange(num_oids)]
self._last_oid = oid_list[-1] self._last_oid = oid_list[-1]
...@@ -249,14 +250,6 @@ class TransactionManager(object): ...@@ -249,14 +250,6 @@ class TransactionManager(object):
if self._last_tid < tid: if self._last_tid < tid:
self._last_tid = tid self._last_tid = tid
def reset(self):
"""
Discard all manager content
This doesn't reset the last TID.
"""
self._ttid_dict = {}
self._node_dict = {}
def hasPending(self): def hasPending(self):
""" """
Returns True if some transactions are pending Returns True if some transactions are pending
......
...@@ -61,6 +61,7 @@ class VerificationManager(BaseServiceHandler): ...@@ -61,6 +61,7 @@ class VerificationManager(BaseServiceHandler):
app.changeClusterState(ClusterStates.VERIFYING) app.changeClusterState(ClusterStates.VERIFYING)
if not app.backup_tid: if not app.backup_tid:
self.verifyData() self.verifyData()
app.setLastTransaction(app.tm.getLastTID())
def verifyData(self): def verifyData(self):
app = self.app app = self.app
...@@ -96,18 +97,33 @@ class VerificationManager(BaseServiceHandler): ...@@ -96,18 +97,33 @@ class VerificationManager(BaseServiceHandler):
# Finish all transactions for which we know that tpc_finish was called # Finish all transactions for which we know that tpc_finish was called
# but not fully processed. This may include replicas with transactions # but not fully processed. This may include replicas with transactions
# that were not even locked. # that were not even locked.
all_set = set()
for ttid, tid in self._locked_dict.iteritems(): for ttid, tid in self._locked_dict.iteritems():
uuid_set = self._voted_dict.get(ttid) uuid_set = self._voted_dict.get(ttid)
if uuid_set: if uuid_set:
all_set |= uuid_set
packet = Packets.ValidateTransaction(ttid, tid) packet = Packets.ValidateTransaction(ttid, tid)
for node in getIdentifiedList(pool_set=uuid_set): for node in getIdentifiedList(pool_set=uuid_set):
node.notify(packet) node.notify(packet)
if app.getLastTransaction() < tid: # XXX: refactoring needed
app.setLastTransaction(tid)
app.tm.setLastTID(tid)
# If possible, send the packets now. # Ask last oid/tid again for nodes that recovers locked transactions.
app.em.poll(0) # In fact, this is mainly for the last oid since the last tid can be
# deduced from max(self._locked_dict.values()).
# If getLastIDs is not always instantaneous for some backends, we
# should split AskLastIDs to not ask the last oid/tid at the end of
# recovery phase (and instead ask all nodes once, here).
# With this request, we also prefer to make sure all nodes validate
# successfully before switching to RUNNING state.
self._askStorageNodesAndWait(Packets.AskLastIDs(),
getIdentifiedList(all_set))
def answerLastIDs(self, conn, loid, ltid, lptid, backup_tid):
self._uuid_set.remove(conn.getUUID())
tm = self.app.tm
tm.setLastOID(loid)
tm.setLastTID(ltid)
ptid = self.app.pt.getID()
assert lptid < ptid if None != lptid != ptid else not backup_tid
def answerLockedTransactions(self, conn, tid_dict): def answerLockedTransactions(self, conn, tid_dict):
uuid = conn.getUUID() uuid = conn.getUUID()
......
...@@ -112,19 +112,6 @@ class testTransactionManager(NeoUnitTestBase): ...@@ -112,19 +112,6 @@ class testTransactionManager(NeoUnitTestBase):
# ...and the lock is available # ...and the lock is available
txnman.begin(client, self.getNextTID()) txnman.begin(client, self.getNextTID())
def test_getNextOIDList(self):
txnman = TransactionManager(lambda tid, txn: None)
# must raise as we don"t have one
self.assertEqual(txnman.getLastOID(), None)
self.assertRaises(RuntimeError, txnman.getNextOIDList, 1)
# ask list
txnman.setLastOID(self.getOID(1))
oid_list = txnman.getNextOIDList(15)
self.assertEqual(len(oid_list), 15)
# begin from 1, so generated oid from 2 to 16
for i, oid in zip(xrange(len(oid_list)), oid_list):
self.assertEqual(oid, self.getOID(i+2))
def test_forget(self): def test_forget(self):
client1 = Mock({'__hash__': 1}) client1 = Mock({'__hash__': 1})
client2 = Mock({'__hash__': 2}) client2 = Mock({'__hash__': 2})
......
...@@ -948,8 +948,12 @@ class Test(NEOThreadedTest): ...@@ -948,8 +948,12 @@ class Test(NEOThreadedTest):
raise _UnexpectedSuccess raise _UnexpectedSuccess
except ConnectionClosed, e: except ConnectionClosed, e:
e = type(e), None, None e = type(e), None, None
# Also check that the master reset the last oid to a correct value.
self.assertTrue(cluster.client.new_oid_list)
t.begin() t.begin()
self.assertIn('x', c.root()) self.assertEqual(1, u64(c.root()['x']._p_oid))
self.assertFalse(cluster.client.new_oid_list)
self.assertEqual(2, u64(cluster.client.new_oid()))
finally: finally:
cluster.stop() cluster.stop()
raise _ExpectedFailure(e) raise _ExpectedFailure(e)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment