Sync with NEO/py v1.12-13-gf2ea4be2 (oldproto branch)

* origin/old-proto: qa: skip broken ZODB test client: fix race with invalidations when starting a new transaction on ZODB 5 Code clean-up, comment fixes master: fix crash in STARTING_BACKUP when connecting to an upstream secondary master mysql: workaround for MDEV-20693 client: inline Application._loadFromCache client: replace global load lock by a per-oid one client: unindent code client: remove load lock in tpc_finish qa: check cache in testExternalInvalidation qa: comment testExternalInvalidation2

Sync with NEO/py v1.12-13-gf2ea4be2 (oldproto branch)
* origin/old-proto: qa: skip broken ZODB test client: fix race with invalidations when starting a new transaction on ZODB 5 Code clean-up, comment fixes master: fix crash in STARTING_BACKUP when connecting to an upstream secondary master mysql: workaround for MDEV-20693 client: inline Application._loadFromCache client: replace global load lock by a per-oid one client: unindent code client: remove load lock in tpc_finish qa: check cache in testExternalInvalidation qa: comment testExternalInvalidation2
3a6a3475 · Kirill Smelkov · a4bf053e · f2ea4be2 · 3a6a3475 · 3a6a3475
Commit 3a6a3475 authored Nov 01, 2020 by Kirill Smelkov
13 changed files
--- a/neo/client/app.py
+++ b/neo/client/app.py
@@ -17,7 +17,7 @@
 import heapq
 import random
 import time
-
+from collections import defaultdict
 try:
    from ZODB._compat import dumps, loads, _protocol
 except ImportError:
@@ -79,7 +79,7 @@ class Application(ThreadedApplication):
        # no self-assigned NID, primary master will supply us one
        self._cache = ClientCache() if cache_size is None else \
                      ClientCache(max_size=cache_size)
-        self._loading_oid = None
+        self._loading = defaultdict(lambda: (Lock(), []))
        self.new_oid_list = ()
        self.last_oid = '\0' * 8
        self.storage_event_handler = storage.StorageEventHandler(self)
@@ -90,19 +90,13 @@ class Application(ThreadedApplication):
        self.notifications_handler = master.PrimaryNotificationsHandler( self)
        self._txn_container = TransactionContainer()
        # Lock definition :
-        # _load_lock is used to make loading and storing atomic
-        lock = Lock()
-        self._load_lock_acquire = lock.acquire
-        self._load_lock_release = lock.release
        # _oid_lock is used in order to not call multiple oid
        # generation at the same time
        lock = Lock()
        self._oid_lock_acquire = lock.acquire
        self._oid_lock_release = lock.release
-        lock = Lock()
        # _cache_lock is used for the client cache
-        self._cache_lock_acquire = lock.acquire
-        self._cache_lock_release = lock.release
+        self._cache_lock = Lock()
        # _connecting_to_master_node is used to prevent simultaneous master
        # node connection attempts
        self._connecting_to_master_node = Lock()
@@ -397,21 +391,32 @@ class Application(ThreadedApplication):
        """
        # TODO:
        # - rename parameters (here? and in handlers & packet definitions)
-
-        acquire = self._cache_lock_acquire
-        release = self._cache_lock_release
-        # XXX: Consider using a more fine-grained lock.
-        self._load_lock_acquire()
+        acquired = False
+        lock = self._cache_lock
        try:
-            acquire()
-            try:
-                result = self._loadFromCache(oid, tid, before_tid)
-                if result:
-                    return result
-                self._loading_oid = oid
-                self._loading_invalidated = []
-            finally:
-                release()
+            while 1:
+                with lock:
+                    if tid:
+                        result = self._cache.load(oid, tid + '*')
+                        assert not result or result[1] == tid
+                    else:
+                        result = self._cache.load(oid, before_tid)
+                    if result:
+                        return result
+                    load_lock = self._loading[oid][0]
+                    acquired = load_lock.acquire(0)
+                # Several concurrent cache misses for the same oid are probably
+                # for the same tid so we use a per-oid lock to avoid asking the
+                # same data to the storage node.
+                if acquired:
+                    # The first thread does load from storage,
+                    # and fills cache with the response.
+                    break
+                # The other threads wait for the first one to complete and
+                # loop, possibly resulting in a new cache miss if a different
+                # tid is actually wanted or if the data was too big.
+                with load_lock:
+                    pass
            # While the cache lock is released, an arbitrary number of
            # invalidations may be processed, for this oid or not. And at this
            # precise moment, if both tid and before_tid are None (which is
@@ -427,20 +432,24 @@ class Application(ThreadedApplication):
                # we got from master.
                before_tid = p64(u64(self.last_tid) + 1)
            data, tid, next_tid, _ = self._loadFromStorage(oid, tid, before_tid)
-            acquire()
-            try:
-                if self._loading_oid:
+            with lock:
+                loading = self._loading.pop(oid, None)
+                if loading:
+                    assert loading[0] is load_lock
                    if not next_tid:
-                        for t in self._loading_invalidated:
+                        for t in loading[1]:
                            if tid < t:
                                next_tid = t
                                break
                    self._cache.store(oid, data, tid, next_tid)
                # Else, we just reconnected to the master.
-            finally:
-                release()
-        finally:
-            self._load_lock_release()
+                load_lock.release()
+        except:
+            if acquired:
+                with lock:
+                    self._loading.pop(oid, None)
+                    load_lock.release()
+            raise
        return data, tid, next_tid

    def _loadFromStorage(self, oid, at_tid, before_tid):
@@ -459,16 +468,6 @@ class Application(ThreadedApplication):
            Packets.AskObject(oid, at_tid, before_tid),
            askStorage)

-    def _loadFromCache(self, oid, at_tid=None, before_tid=None):
-        """
-        Load from local cache, return None if not found.
-        """
-        if at_tid:
-            result = self._cache.load(oid, at_tid + '*')
-            assert not result or result[1] == at_tid
-            return result
-        return self._cache.load(oid, before_tid)
-
    def tpc_begin(self, storage, transaction, tid=None, status=' '):
        """Begin a new transaction."""
        # First get a transaction, only one is allowed at a time
@@ -670,7 +669,7 @@ class Application(ThreadedApplication):
        txn_context = self._txn_container.pop(transaction)
        if txn_context is None:
            return
-        # We want that the involved nodes abort a transaction after any
+        # We want the involved nodes to abort a transaction after any
        # other packet sent by the client for this transaction. IOW, if we
        # already have a connection with a storage node, potentially with
        # a pending write, aborting only via the master may lead to a race
@@ -699,9 +698,8 @@ class Application(ThreadedApplication):
                                                txn_context.conn_dict))
            except ConnectionClosed:
                pass
-        # We don't need to flush queue, as it won't be reused by future
-        # transactions (deleted on next line & indexed by transaction object
-        # instance).
+        # No need to flush queue, as it will be destroyed on return,
+        # along with txn_context.
        self.dispatcher.forget_queue(txn_context.queue, flush_queue=False)

    def tpc_finish(self, transaction, f=None):
@@ -724,28 +722,22 @@ class Application(ThreadedApplication):
        txn_container = self._txn_container
        if not txn_container.get(transaction).voted:
            self.tpc_vote(transaction)
-        checked_list = []
-        self._load_lock_acquire()
+        txn_context = txn_container.pop(transaction)
+        cache_dict = txn_context.cache_dict
+        checked_list = [oid for oid, data  in cache_dict.iteritems()
+                            if data is CHECKED_SERIAL]
+        for oid in checked_list:
+            del cache_dict[oid]
+        ttid = txn_context.ttid
+        p = Packets.AskFinishTransaction(ttid, cache_dict, checked_list)
        try:
-            # Call finish on master
-            txn_context = txn_container.pop(transaction)
-            cache_dict = txn_context.cache_dict
-            checked_list = [oid for oid, data  in cache_dict.iteritems()
-                                if data is CHECKED_SERIAL]
-            for oid in checked_list:
-                del cache_dict[oid]
-            ttid = txn_context.ttid
-            p = Packets.AskFinishTransaction(ttid, cache_dict, checked_list)
-            try:
-                tid = self._askPrimary(p, cache_dict=cache_dict, callback=f)
-                assert tid
-            except ConnectionClosed:
-                tid = self._getFinalTID(ttid)
-                if not tid:
-                    raise
-            return tid
-        finally:
-            self._load_lock_release()
+            tid = self._askPrimary(p, cache_dict=cache_dict, callback=f)
+            assert tid
+        except ConnectionClosed:
+            tid = self._getFinalTID(ttid)
+            if not tid:
+                raise
+        return tid

    def _getFinalTID(self, ttid):
        try:
@@ -991,11 +983,8 @@ class Application(ThreadedApplication):
        # It should not be otherwise required (clients should be free to load
        # old data as long as it is available in cache, event if it was pruned
        # by a pack), so don't bother invalidating on other clients.
-        self._cache_lock_acquire()
-        try:
+        with self._cache_lock:
            self._cache.clear()
-        finally:
-            self._cache_lock_release()

    def getLastTID(self, oid):
        return self.load(oid)[1]

--- a/neo/client/handlers/master.py
+++ b/neo/client/handlers/master.py
+# -*- coding: utf-8 -*-
 #
 # Copyright (C) 2006-2019  Nexedi SA
 #
@@ -45,8 +46,7 @@ class PrimaryNotificationsHandler(MTEventHandler):
            # Either we're connecting or we already know the last tid
            # via invalidations.
            assert app.master_conn is None, app.master_conn
-            app._cache_lock_acquire()
-            try:
+            with app._cache_lock:
                if app_last_tid < ltid:
                    app._cache.clear_current()
                    # In the past, we tried not to invalidate the
@@ -60,9 +60,7 @@ class PrimaryNotificationsHandler(MTEventHandler):
                    app._cache.clear()
                # Make sure a parallel load won't refill the cache
                # with garbage.
-                app._loading_oid = app._loading_invalidated = None
-            finally:
-                app._cache_lock_release()
+                app._loading.clear()
            db = app.getDB()
            db is None or db.invalidateCache()
            app.last_tid = ltid
@@ -70,21 +68,22 @@ class PrimaryNotificationsHandler(MTEventHandler):

    def answerTransactionFinished(self, conn, _, tid, callback, cache_dict):
        app = self.app
-        app.last_tid = tid
-        # Update cache
        cache = app._cache
-        app._cache_lock_acquire()
-        try:
+        invalidate = cache.invalidate
+        loading_get = app._loading.get
+        with app._cache_lock:
            for oid, data in cache_dict.iteritems():
                # Update ex-latest value in cache
-                cache.invalidate(oid, tid)
+                invalidate(oid, tid)
+                loading = loading_get(oid)
+                if loading:
+                    loading[1].append(tid)
                if data is not None:
                    # Store in cache with no next_tid
                    cache.store(oid, data, tid, None)
            if callback is not None:
                callback(tid)
-        finally:
-            app._cache_lock_release()
+            app.last_tid = tid # see comment in invalidateObjects

    def connectionClosed(self, conn):
        app = self.app
@@ -112,20 +111,24 @@ class PrimaryNotificationsHandler(MTEventHandler):
        app = self.app
        if app.ignore_invalidations:
            return
-        app.last_tid = tid
-        app._cache_lock_acquire()
-        try:
+        with app._cache_lock:
            invalidate = app._cache.invalidate
-            loading = app._loading_oid
+            loading_get = app._loading.get
            for oid in oid_list:
                invalidate(oid, tid)
-                if oid == loading:
-                    app._loading_invalidated.append(tid)
+                loading = loading_get(oid)
+                if loading:
+                    loading[1].append(tid)
            db = app.getDB()
            if db is not None:
                db.invalidate(tid, oid_list)
-        finally:
-            app._cache_lock_release()
+            # ZODB<5: Update before releasing the lock so that app.load
+            #         asks the last serial (with respect to already processed
+            #         invalidations by Connection._setstate).
+            # ZODB≥5: Update after db.invalidate because the MVCC
+            #         adapter starts at the greatest TID between
+            #         IStorage.lastTransaction and processed invalidations.
+            app.last_tid = tid

    def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
        pt = self.app.pt = object.__new__(PartitionTable)

--- a/neo/client/transactions.py
+++ b/neo/client/transactions.py
@@ -50,7 +50,7 @@ class Transaction(object):
        self.conflict_dict = {}             # {oid: serial}
        # resolved conflicts
        self.resolved_dict = {}             # {oid: serial}
-        # involved storage nodes; connection is None is connection was lost
+        # involved storage nodes; connection is None if connection was lost
        self.conn_dict = {}                 # {node_id: connection}

    def __repr__(self):

--- a/neo/debug.py
+++ b/neo/debug.py
@@ -197,8 +197,7 @@ elif IF == 'trace-cache':

    @defer
    def profile(app):
-        app._cache_lock_acquire()
-        try:
+        with app._cache_lock:
            cache = app._cache
            if type(cache) is ClientCache:
                app._cache = CacheTracer(cache, '%s-%s.neo-cache-trace' %
@@ -206,5 +205,3 @@ elif IF == 'trace-cache':
                app._cache.clear()
            else:
                app._cache = cache.close()
-        finally:
-            app._cache_lock_release()
--- a/neo/master/app.py
+++ b/neo/master/app.py
@@ -588,8 +588,8 @@ class Application(BaseApplication):
        node.send(Packets.StartOperation(self.backup_tid))
        uuid = node.getUUID()
        assert uuid not in self.storage_starting_set
-        if uuid not in self.storage_ready_dict:
-            self.storage_starting_set.add(uuid)
+        assert uuid not in self.storage_ready_dict
+        self.storage_starting_set.add(uuid)

    def setStorageReady(self, uuid):
        self.storage_starting_set.remove(uuid)

--- a/neo/master/backup_app.py
+++ b/neo/master/backup_app.py
@@ -66,6 +66,7 @@ There is no conflict of node id between the 2 clusters:
 class BackupApplication(object):

    pt = None
+    server = None # like in BaseApplication
    uuid = None

    def __init__(self, app, name, master_addresses):

--- a/neo/storage/database/mysqldb.py
+++ b/neo/storage/database/mysqldb.py
@@ -781,11 +781,19 @@ class MySQLDatabaseManager(DatabaseManager):
        if max_tid is not None:
            sql += " AND tid <= %d" % max_tid
        q = self.query
-        q("DELETE FROM trans" + sql)
+        if q("SELECT 1 FROM trans%s LIMIT 1" % sql):
+            q("DELETE FROM trans" + sql)
+        else:
+            logging.info("Nothing to truncate in trans for partition %s",
+                         partition)
        sql = " FROM obj" + sql
        data_id_list = [x for x, in q(
            "SELECT DISTINCT data_id%s AND data_id IS NOT NULL" % sql)]
-        q("DELETE" + sql)
+        if q("SELECT 1%s LIMIT 1" % sql):
+            q("DELETE" + sql)
+        else:
+            logging.info("Nothing to truncate in obj for partition %s",
+                         partition)
        self._pruneData(data_id_list)

    def getTransaction(self, tid, all = False):

--- a/neo/storage/handlers/client.py
+++ b/neo/storage/handlers/client.py
@@ -34,7 +34,7 @@ class ClientOperationHandler(BaseHandler):
        app = self.app
        if app.operational:
            # Even if in most cases, abortFor is called from both this method
-            # and BaseMasterHandler.notifyPartitionChanges (especially since
+            # and BaseMasterHandler.notifyNodeInformation (especially since
            # storage nodes disconnects unknown clients on their own), these 2
            # handlers also cover distinct scenarios, so neither of them is
            # redundant:

--- a/neo/storage/transactions.py
+++ b/neo/storage/transactions.py
@@ -139,10 +139,11 @@ class TransactionManager(EventQueue):

    def replicating(self, offset_list):
        self._replicating.update(offset_list)
-        isdisjoint = set(offset_list).isdisjoint
-        assert isdisjoint(self._replicated), (offset_list, self._replicated)
-        assert isdisjoint(map(self.getPartition, self._store_lock_dict)), (
-            offset_list, self._store_lock_dict)
+        if __debug__:
+            isdisjoint = set(offset_list).isdisjoint
+            assert isdisjoint(self._replicated), (offset_list, self._replicated)
+            assert isdisjoint(map(self.getPartition, self._store_lock_dict)), (
+                offset_list, self._store_lock_dict)
        p = Packets.AskUnfinishedTransactions(offset_list)
        self._app.master_conn.ask(p, offset_list=offset_list)


--- a/neo/tests/threaded/__init__.py
+++ b/neo/tests/threaded/__init__.py
@@ -1120,8 +1120,7 @@ class NEOThreadedTest(NeoTestBase):

        def run(self):
            try:
-                apply(*self.__target)
-                self.__exc_info = None
+                self.__result = apply(*self.__target)
            except:
                self.__exc_info = sys.exc_info()
                if self.__exc_info[0] is NEOThreadedTest.failureException:
@@ -1129,10 +1128,13 @@ class NEOThreadedTest(NeoTestBase):

        def join(self, timeout=None):
            threading.Thread.join(self, timeout)
-            if not self.is_alive() and self.__exc_info:
-                etype, value, tb = self.__exc_info
-                del self.__exc_info
-                raise etype, value, tb
+            if not self.is_alive():
+                try:
+                    return self.__result
+                except AttributeError:
+                    etype, value, tb = self.__exc_info
+                    del self.__exc_info
+                    raise etype, value, tb

    class newThread(newPausedThread):


--- a/neo/tests/threaded/test.py
+++ b/neo/tests/threaded/test.py
--- a/neo/tests/threaded/testReplication.py
+++ b/neo/tests/threaded/testReplication.py
@@ -400,6 +400,22 @@ class ReplicationTests(NEOThreadedTest):
                self.tic()
                self.assertTrue(backup.master.is_alive())

+    @with_cluster(master_count=2)
+    def testBackupFromUpstreamWithSecondaryMaster(self, upstream):
+        """
+        Check that the backup master reacts correctly when connecting first
+        to a secondary master of the upstream cluster.
+        """
+        with NEOCluster(upstream=upstream) as backup:
+            primary = upstream.primary_master
+            m, = (m for m in upstream.master_list if m is not primary)
+            backup.master.resetNode(upstream_masters=[m.server])
+            backup.start()
+            backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
+            self.tic()
+            self.assertEqual(backup.neoctl.getClusterState(),
+                             ClusterStates.BACKINGUP)
+
    @backup_test()
    def testCreationUndone(self, backup):
        """

--- a/neo/tests/zodb/testBasic.py
+++ b/neo/tests/zodb/testBasic.py
@@ -39,6 +39,14 @@ class BasicTests(ZODBTestCase, StorageTestBase, BasicStorage):
        with Patch(threaded, TIC_LOOP=TIC_LOOP()):
            super(BasicTests, self).check_checkCurrentSerialInTransaction()

+    # The test expects that both load & lastTransaction would be blocked
+    # as long as the tpc_finish callback has not finished, taking more
+    # than .1 second. ZODB 5.6.0 clarified that lastTransaction() can
+    # return immediately with the previous last TID rather than blocking
+    # until it is allowed to return the new last TID.
+    check_tid_ordering_w_commit = unittest.skip("ZODB PR #316")(
+        BasicStorage.check_tid_ordering_w_commit)
+
 if __name__ == "__main__":
    suite = unittest.makeSuite(BasicTests, 'check')
    unittest.main(defaultTest='suite')