Commit 7099fd12 authored by Kirill Smelkov's avatar Kirill Smelkov

Fix loadBefore vs GC

TemporaryStorage uses

    ._index   {} oid -> serial
    ._opickle {} oid -> data

as data store for load. However for loadBefore

    ._conflict_cache  {} (oid,serial) -> (data, time)

is reused as data store.

That would be ok if that place would be treated as data store, but given
its primary purpose - as its name suggests - was originally to be a
cache to resolve conflicts, it is "logical" that entries in this cache
are garbage-collected when entry age becomes > gc threshold.

Only now there is a problem: if an object is committed once, and time
passes, corresponding entry in ._conflict_cache will be removed. And this
would manifest itself as

    - load(oid)  -> gives latest data for the object (obtained via ._index and ._opickle)

but

    - loadBefore(oid, @head)  -> gives POSKeyError

-> Fix it by always preserving latest object revision in
._conflict_cache from being removed on GC.

The fix is important for systems that use ZODB5, or ZODB4-wc2[1] because
there ZODB.Connection switched from primarily using load to exclusively
using loadBefore.

/cc @icemac @mauritsvanrees @mgedmin @d-maurer @dwt @hannosch

Fixes: https://github.com/zopefoundation/tempstorage/issues/8
[1] nexedi/ZODB@8e7eab33
parent 7670236c
......@@ -124,11 +124,20 @@ and https://github.com/zopefoundation/tempstorage
def _clear_temp(self):
now = time.time()
if now > (self._last_cache_gc + self._conflict_cache_gcevery):
temp_cc = self._conflict_cache.copy()
for k, v in temp_cc.items():
data, t = v
# build {} oid -> [](serial, data, t)
byoid = {}
for ((oid,serial), (data,t)) in self._conflict_cache.items():
hist = byoid.setdefault(oid, [])
hist.append((serial, data, t))
# gc entries but keep latest record for each oid
for oid, hist in byoid.items():
hist.sort(key=lambda _: _[0]) # by serial
hist = hist[:-1] # without latest record
for serial, data, t in hist:
if now > (t + self._conflict_cache_maxage):
del self._conflict_cache[k]
del self._conflict_cache[(oid,serial)]
self._last_cache_gc = now
self._tmp = []
......
......@@ -180,25 +180,45 @@ class TemporaryStorageTests(unittest.TestCase):
storage._conflict_cache_gcevery = 1 # second
storage._conflict_cache_maxage = 1 # second
oid = storage.new_oid()
self._dostore(storage, oid, data=MinPO(5))
# assertCacheKeys asserts that set(storage._conflict_cache.keys()) == oidrevSet
# storage._conflict_cache is organized as {} (oid,rev) -> (data,t) and
# so is used by loadBefore as data storage. It is important that latest
# revision of an object is not garbage-collected so that loadBefore
# does not loose what was last committed.
def assertCacheKeys(*voidrevOK):
oidrevOK = set(voidrevOK)
self.assertEqual(set(storage._conflict_cache.keys()), oidrevOK)
oid1 = storage.new_oid()
self._dostore(storage, oid1, data=MinPO(5))
rev11 = storage.lastTransaction()
self._dostore(storage, oid1, revid=rev11, data=MinPO(7))
rev12 = storage.lastTransaction()
time.sleep(2)
oid2 = storage.new_oid()
self._dostore(storage, oid2, data=MinPO(10))
rev21 = storage.lastTransaction()
oid3 = storage.new_oid()
self._dostore(storage, oid3, data=MinPO(9))
rev31 = storage.lastTransaction()
# (oid1, rev11) garbage-collected
assertCacheKeys((oid1, rev12), (oid2, rev21), (oid3, rev31))
self.assertEqual(len(storage._conflict_cache), 2)
self._dostore(storage, oid2, revid=rev21, data=MinPO(11))
rev22 = storage.lastTransaction()
time.sleep(2)
oid4 = storage.new_oid()
self._dostore(storage, oid4, data=MinPO(11))
rev41 = storage.lastTransaction()
self.assertEqual(len(storage._conflict_cache), 1)
# (oid2, rev21) garbage-collected
assertCacheKeys((oid1, rev12), (oid2, rev22), (oid3, rev31), (oid4, rev41))
def test_have_MVCC_ergo_no_ReadConflict(self):
from ZODB.DB import DB
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment