From bc066b9a5848b80a480e9055d924395db40e3dca Mon Sep 17 00:00:00 2001 From: Jim Fulton <jim@zope.com> Date: Mon, 20 Sep 2010 18:09:40 +0000 Subject: [PATCH] Bugs fixes: - If a ZEO client process was restarted while invalidating a ZEO cache entry, the cache could be left in a stage when there is data marked current that should be invalidated, leading to persistent conflict errors. - Corrupted or invalid cache files prevented ZEO clients from starting. Now, bad cache files are moved aside. - Invalidations of object records in ZEO caches, where the invalidation transaction ids matched the cached transaction ids should have been ignored. --- src/CHANGES.txt | 12 ++++++ src/ZEO/cache.py | 22 ++++++++++- src/ZEO/tests/test_cache.py | 77 +++++++++++++++++++++++++++++++++---- 3 files changed, 103 insertions(+), 8 deletions(-) diff --git a/src/CHANGES.txt b/src/CHANGES.txt index 0a99a352..7ddbebc6 100644 --- a/src/CHANGES.txt +++ b/src/CHANGES.txt @@ -13,6 +13,18 @@ Bugs Fixed (Thanks to Christian Zagrodnick for chasing this down.) +- If a ZEO client process was restarted while invalidating a ZEO cache + entry, the cache could be left in a stage when there is data marked + current that should be invalidated, leading to persistent conflict + errors. + +- Corrupted or invalid cache files prevented ZEO clients from + starting. Now, bad cache files are moved aside. + +- Invalidations of object records in ZEO caches, where the + invalidation transaction ids matched the cached transaction ids + should have been ignored. + - On Mac OS X, clients that connected and disconnected quickly could cause a ZEO server to stop accepting connections, due to a failure to catch errors in the initial part of the connection process. diff --git a/src/ZEO/cache.py b/src/ZEO/cache.py index cc2206be..be80bdaa 100644 --- a/src/ZEO/cache.py +++ b/src/ZEO/cache.py @@ -207,7 +207,24 @@ class ClientCache(object): self.f.write(magic+z64) logger.info("created temporary cache file %r", self.f.name) - self._initfile(fsize) + try: + self._initfile(fsize) + except: + if not path: + raise # unrecoverable temp file error :( + badpath = path+'.bad' + if os.path.exists(badpath): + logger.critical( + 'Removing bad cache file: %r (prev bad exists).', + path, exc_info=1) + os.remove(path) + else: + logger.critical('Moving bad cache file to %r.', + badpath, exc_info=1) + os.rename(path, badpath) + self.f = open(path, 'wb+') + self.f.write(magic+z64) + self._initfile(ZEC_HEADER_SIZE) # Statistics: _n_adds, _n_added_bytes, # _n_evicts, _n_evicted_bytes, @@ -672,6 +689,9 @@ class ClientCache(object): self._trace(0x1E, oid, tid) self._len -= 1 else: + if tid == saved_tid: + logger.warning("Ignoring invalidation with same tid as current") + return self.f.seek(ofs+21) self.f.write(tid) self._set_noncurrent(oid, saved_tid, ofs) diff --git a/src/ZEO/tests/test_cache.py b/src/ZEO/tests/test_cache.py index f625f5cb..1e129659 100644 --- a/src/ZEO/tests/test_cache.py +++ b/src/ZEO/tests/test_cache.py @@ -488,17 +488,80 @@ __test__ = dict( >>> logger.setLevel(logging.NOTSET) >>> logger.removeHandler(handler) + >>> logger.setLevel(logging.NOTSET) + >>> logger.removeHandler(handler) >>> cache.close() """, - bad_magic_number = - r""" - >>> open('cache', 'w').write("Hi world!") - >>> try: cache = ZEO.cache.ClientCache('cache', 1000) - ... except Exception, v: print v - unexpected magic number: 'Hi w' - """ ) +def invalidations_with_current_tid_dont_wreck_cache(): + """ + >>> cache = ZEO.cache.ClientCache('cache', 1000) + >>> cache.store(p64(1), p64(1), None, 'data') + >>> import logging, sys + >>> handler = logging.StreamHandler(sys.stdout) + >>> logging.getLogger().addHandler(handler) + >>> old_level = logging.getLogger().getEffectiveLevel() + >>> logging.getLogger().setLevel(logging.WARNING) + >>> cache.invalidate(p64(1), p64(1)) + Ignoring invalidation with same tid as current + >>> cache.close() + >>> cache = ZEO.cache.ClientCache('cache', 1000) + >>> cache.close() + >>> logging.getLogger().removeHandler(handler) + >>> logging.getLogger().setLevel(old_level) + """ + +def rename_bad_cache_file(): + """ +An attempt to open a bad cache file will cause it to be dropped and recreated. + + >>> open('cache', 'w').write('x'*100) + >>> import logging, sys + >>> handler = logging.StreamHandler(sys.stdout) + >>> logging.getLogger().addHandler(handler) + >>> old_level = logging.getLogger().getEffectiveLevel() + >>> logging.getLogger().setLevel(logging.WARNING) + + >>> cache = ZEO.cache.ClientCache('cache', 1000) # doctest: +ELLIPSIS + Moving bad cache file to 'cache.bad'. + Traceback (most recent call last): + ... + ValueError: unexpected magic number: 'xxxx' + + >>> cache.store(p64(1), p64(1), None, 'data') + >>> cache.close() + >>> f = open('cache') + >>> f.seek(0, 2) + >>> f.tell() + 1000 + >>> f.close() + + >>> open('cache', 'w').write('x'*200) + >>> cache = ZEO.cache.ClientCache('cache', 1000) # doctest: +ELLIPSIS + Removing bad cache file: 'cache' (prev bad exists). + Traceback (most recent call last): + ... + ValueError: unexpected magic number: 'xxxx' + + >>> cache.store(p64(1), p64(1), None, 'data') + >>> cache.close() + >>> f = open('cache') + >>> f.seek(0, 2) + >>> f.tell() + 1000 + >>> f.close() + + >>> f = open('cache.bad') + >>> f.seek(0, 2) + >>> f.tell() + 100 + >>> f.close() + + >>> logging.getLogger().removeHandler(handler) + >>> logging.getLogger().setLevel(old_level) + """ + def test_suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(CacheTests)) -- 2.30.9