Merge rev 28660 from 3.3 branch.

More code and comment cleanups. Notable changes: FileCache: removed the currentsize attribute. It was initialized to 0 but never updated. Some tests referenced it, but since it was always 0 those tests weren't getting any good from it. Don't recall (or never knew) what its intended purpose may have been. FileCache.remove(): Repaired major bug. This mistakenly stored the "this disk block is free now" status byte into the start of the serialized Object record's end_tid field (incorrect seek offset). The in-memory structures were correct then, but got out of synch with the disk file; the latter then still claimed to have info for a "live" object revision, but that revision was actually dead, and the info on disk had a corrupt end_tid value.

Merge rev 28660 from 3.3 branch.
More code and comment cleanups. Notable changes: FileCache: removed the currentsize attribute. It was initialized to 0 but never updated. Some tests referenced it, but since it was always 0 those tests weren't getting any good from it. Don't recall (or never knew) what its intended purpose may have been. FileCache.remove(): Repaired major bug. This mistakenly stored the "this disk block is free now" status byte into the start of the serialized Object record's end_tid field (incorrect seek offset). The in-memory structures were correct then, but got out of synch with the disk file; the latter then still claimed to have info for a "live" object revision, but that revision was actually dead, and the info on disk had a corrupt end_tid value.
f1e30c0f · Tim Peters · 18dd890c · f1e30c0f · f1e30c0f · f1e30c0f
Commit f1e30c0f authored Dec 21, 2004 by Tim Peters
Show whitespace changes
Inline Side-by-side

Showing with 125 additions and 62 deletions

NEWS.txt NEWS.txt +6 -0

src/ZEO/cache.py src/ZEO/cache.py +119 -60

src/ZEO/tests/test_cache.py src/ZEO/tests/test_cache.py +0 -2

No files found.
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -55,6 +55,12 @@ What's new in ZODB3 3.3.1?
 ==========================
 Release date: xx-xxx-2004
+ZEO client cache
+----------------
+- Fixed a bug wherein an object removed from the client cache didn't
+  properly mark the file slice it occupied as being available for reuse.
 persistent
 ----------

--- a/src/ZEO/cache.py
+++ b/src/ZEO/cache.py
@@ -556,7 +556,7 @@ class Entry(object):
 ##
 # FileCache stores a cache in a single on-disk file.
 #
-# On-disk cache structure
+# On-disk cache structure.
 #
 # The file begins with a 12-byte header.  The first four bytes are the
 # file's magic number - ZEC3 - indicating zeo cache version 3.  The
@@ -583,13 +583,13 @@ ZEC3_HEADER_SIZE = 12
 # empty (size 0) blocks.
-# XXX This needs a lot more hair.
+# Allocated blocks have more structure:
-# The structure of an allocated block is more complicated:
 #
 #     1 byte allocation status ('a').
 #     4 bytes block size, >I format.
 #     16 bytes oid + tid, string.
-#     size-OBJECT_HEADER_SIZE bytes, the object pickle.
+#     size-OBJECT_HEADER_SIZE bytes, the serialization of an Object (see
+#         class Object for details).
 OBJECT_HEADER_SIZE = 1 + 4 + 16
@@ -613,11 +613,19 @@ def sync(f):
 class FileCache(object):
    def __init__(self, maxsize, fpath, parent, reuse=True):
-        # Maximum total of object sizes we keep in cache.
+        # - `maxsize`:  total size of the cache file, in bytes
+        # - `fpath`:  filepath for the cache file, or None; see `reuse`
+        # - `parent`:  the ClientCache this FileCache is part of
+        # - `reuse`:  If true, and fpath is not None, and fpath names a
+        #    file that exists, that pre-existing file is used (persistent
+        #    cache).  In all other cases a new file is created:  a temp
+        #    file if fpath is None, else with path fpath.
+        assert maxsize >= 1000  # although 1000 is still absurdly low
        self.maxsize = maxsize
-        # Current total of object sizes in cache.
-        self.currentsize = 0
        self.parent = parent
+        # tid for the most recent transaction we know about.  This is also
+        # stored near the start of the file.
        self.tid = None
        # Map offset in file to pair (data record size, Entry).
@@ -625,10 +633,12 @@ class FileCache(object):
        # filemap always contains a complete account of what's in the
        # file -- study method _verify_filemap for executable checking
        # of the relevant invariants.  An offset is at the start of a
-        # block iff it's a key in filemap.
+        # block iff it's a key in filemap.  The data record size is
+        # stored in the file too, so we could just seek to the offset
+        # and read it up; keeping it in memory too is an optimization.
        self.filemap = {}
-        # Map key to Entry.  There's one entry for each object in the
+        # Map key to Entry.  There's one Entry for each object in the
        # cache file.  After
        #     obj = key2entry[key]
        # then
@@ -641,6 +651,12 @@ class FileCache(object):
        # currentofs.
        self.currentofs = ZEC3_HEADER_SIZE
+        # self.new is false iff we're reusing an existing file.
+        # self.f is the open file object.
+        # When we're not reusing an existing file, self.f is left None
+        # here -- the scan() method must be called then to open the file
+        # (and it sets self.f).
        self.fpath = fpath
        if not reuse or not fpath or not os.path.exists(fpath):
            self.new = True
@@ -663,17 +679,19 @@ class FileCache(object):
            self.filemap[ZEC3_HEADER_SIZE] = (self.maxsize - ZEC3_HEADER_SIZE,
                                              None)
        else:
+            # Reuse an existing file.  scan() will open & read it.
            self.new = False
+            assert fpath
            self.f = None
        # Statistics:  _n_adds, _n_added_bytes,
        #              _n_evicts, _n_evicted_bytes
        self.clearStats()
+    ##
    # Scan the current contents of the cache file, calling install
    # for each object found in the cache.  This method should only
    # be called once to initialize the cache from disk.
    def scan(self, install):
        if self.new:
            return
@@ -683,6 +701,8 @@ class FileCache(object):
        if _magic != magic:
            raise ValueError("unexpected magic number: %r" % _magic)
        self.tid = self.f.read(8)
+        if len(self.tid) != 8:
+            raise ValueError("cache file too small -- no tid at start")
        # Remember the largest free block.  That seems a
        # decent place to start currentofs.
        max_free_size = max_free_offset = 0
@@ -702,7 +722,8 @@ class FileCache(object):
            elif status in '1234':
                size = int(status)
            else:
-                assert 0, hex(ord(status))
+                raise ValueError("unknown status byte value %s in client "
+                                 "cache file" % 0, hex(ord(status)))
            self.filemap[ofs] = size, ent
            if ent is None and size > max_free_size:
@@ -710,7 +731,9 @@ class FileCache(object):
            ofs += size
-        assert ofs == fsize
+        if ofs != fsize:
+            raise ValueError("final offset %s != file size %s in client "
+                             "cache file" % (ofs, fsize))
        if __debug__:
            self._verify_filemap()
        self.currentofs = max_free_offset
@@ -728,35 +751,47 @@ class FileCache(object):
                self._n_accesses
               )
+    ##
+    # The number of objects currently in the cache.
    def __len__(self):
        return len(self.key2entry)
+    ##
+    # Iterate over the objects in the cache, producing an Entry for each.
    def __iter__(self):
        return self.key2entry.itervalues()
+    ##
+    # Test whether an (oid, tid) pair is in the cache.
    def __contains__(self, key):
        return key in self.key2entry
+    ##
+    # Do all possible to ensure all bytes written to the file so far are
+    # actually on disk.
    def sync(self):
        sync(self.f)
+    ##
+    # Close the underlying file.  No methods accessing the cache should be
+    # used after this.
    def close(self):
        if self.f:
            self.sync()
            self.f.close()
            self.f = None
+    ##
    # Evict objects as necessary to free up at least nbytes bytes,
    # starting at currentofs.  If currentofs is closer than nbytes to
-    # the end of the file, currentofs is reset to 0.  The number of
+    # the end of the file, currentofs is reset to ZEC3_HEADER_SIZE first.
-    # bytes actually freed may be (and probably will be) greater than
+    # The number of bytes actually freed may be (and probably will be)
-    # nbytes, and is _makeroom's return value.  The file is not
+    # greater than nbytes, and is _makeroom's return value.  The file is not
    # altered by _makeroom.  filemap is updated to reflect the
    # evictions, and it's the caller's responsibilty both to fiddle
    # the file, and to update filemap, to account for all the space
    # freed (starting at currentofs when _makeroom returns, and
    # spanning the number of bytes retured by _makeroom).
    def _makeroom(self, nbytes):
        assert 0 < nbytes <= self.maxsize
        if self.currentofs + nbytes > self.maxsize:
@@ -770,11 +805,11 @@ class FileCache(object):
            nbytes -= size
        return ofs - self.currentofs
+    ##
    # Write Object obj, with data, to file starting at currentofs.
    # nfreebytes are already available for overwriting, and it's
    # guranteed that's enough.  obj.offset is changed to reflect the
    # new data record position, and filemap is updated to match.
    def _writeobj(self, obj, nfreebytes):
        size = OBJECT_HEADER_SIZE + obj.size
        assert size <= nfreebytes
@@ -806,12 +841,20 @@ class FileCache(object):
            # written.
            self.filemap[self.currentofs] = excess, None
+    ##
+    # Add Object object to the cache.  This may evict existing objects, to
+    # make room (and almost certainly will, in steady state once the cache
+    # is first full).  The object must not already be in the cache.
    def add(self, object):
        size = OBJECT_HEADER_SIZE + object.size
-        if size > self.maxsize:
+        # A number of cache simulation experiments all concluded that the
+        # 2nd-level ZEO cache got a much higher hit rate if "very large"
+        # objects simply weren't cached.  For now, we ignore the request
+        # only if the entire cache file is too small to hold the object.
+        if size > self.maxsize - ZEC3_HEADER_SIZE:
            return
-        assert size <= self.maxsize
+        assert size <= self.maxsize
        assert object.key not in self.key2entry
        assert len(object.key[0]) == 8
        assert len(object.key[1]) == 8
@@ -822,31 +865,12 @@ class FileCache(object):
        available = self._makeroom(size)
        self._writeobj(object, available)
-    def _verify_filemap(self, display=False):
+    ##
-        a = ZEC3_HEADER_SIZE
+    # Evict the object represented by Entry `e` from the cache, freeing
-        f = self.f
+    # `size` bytes in the file for reuse.  `size` is used only for summary
-        while a < self.maxsize:
+    # statistics.  This does not alter the file, or self.filemap or
-            f.seek(a)
+    # self.key2entry (those are the caller's responsibilities).  It does
-            status = f.read(1)
+    # invoke _evicted(Object) on our parent.
-            if status in 'af':
-                size, = struct.unpack(">I", f.read(4))
-            else:
-                size = int(status)
-            if display:
-                if a == self.currentofs:
-                    print '*****',
-                print "%c%d" % (status, size),
-            size2, obj = self.filemap[a]
-            assert size == size2
-            assert (obj is not None) == (status == 'a')
-            if obj is not None:
-                assert obj.offset == a
-                assert self.key2entry[obj.key] is obj
-            a += size
-        if display:
-            print
-        assert a == self.maxsize
    def _evictobj(self, e, size):
        self._n_evicts += 1
        self._n_evicted_bytes += size
@@ -857,8 +881,7 @@ class FileCache(object):
        self.parent._evicted(o)
    ##
-    # Return object for key or None if not in cache.
+    # Return Object for key, or None if not in cache.
    def access(self, key):
        self._n_accesses += 1
        e = self.key2entry.get(key)
@@ -872,8 +895,7 @@ class FileCache(object):
        return Object.fromFile(self.f, key)
    ##
-    # Remove object for key from cache, if present.
+    # Remove Object for key from cache, if present.
    def remove(self, key):
        # If an object is being explicitly removed, we need to load
        # its header into memory and write a free block marker to the
@@ -888,24 +910,33 @@ class FileCache(object):
            return
        offset = e.offset
        size, e2 = self.filemap[offset]
+        assert size >= 5  # only free blocks are tiny
        self.f.seek(offset + OBJECT_HEADER_SIZE)
        o = Object.fromFile(self.f, key, header_only=True)
-        self.f.seek(offset + OBJECT_HEADER_SIZE)
+        # Because `size` >= 5, we can change an allocated block to a free
+        # block just by overwriting the 'a' status byte with 'f' -- the
+        # size field stays the same.
+        self.f.seek(offset)
        self.f.write('f')
        self.f.flush()
        self.parent._evicted(o)
        self.filemap[offset] = size, None
    ##
-    # Update on-disk representation of obj.
+    # Update on-disk representation of Object obj.
    #
    # This method should be called when the object header is modified.
+    # obj must be in the cache.
    def update(self, obj):
        e = self.key2entry[obj.key]
        self.f.seek(e.offset + OBJECT_HEADER_SIZE)
        obj.serialize_header(self.f)
+    ##
+    # Update our idea of the most recent tid.  This is stored in the 
+    # instance, and also written out near the start of the cache file.  The
+    # new tid must be strictly greater than our current idea of the most
+    # recent tid.
    def settid(self, tid):
        if self.tid is not None and tid <= self.tid:
            raise ValueError("new last tid (%s) must be greater than "
@@ -913,6 +944,34 @@ class FileCache(object):
                                                    u64(self.tid)))
        assert isinstance(tid, str) and len(tid) == 8
        self.tid = tid
-        self.f.seek(4)
+        self.f.seek(len(magic))
        self.f.write(tid)
        self.f.flush()
+    ##
+    # This debug method marches over the entire cache file, verifying that
+    # the current contents match the info in self.filemap and self.key2entry.
+    def _verify_filemap(self, display=False):
+        a = ZEC3_HEADER_SIZE
+        f = self.f
+        while a < self.maxsize:
+            f.seek(a)
+            status = f.read(1)
+            if status in 'af':
+                size, = struct.unpack(">I", f.read(4))
+            else:
+                size = int(status)
+            if display:
+                if a == self.currentofs:
+                    print '*****',
+                print "%c%d" % (status, size),
+            size2, obj = self.filemap[a]
+            assert size == size2
+            assert (obj is not None) == (status == 'a')
+            if obj is not None:
+                assert obj.offset == a
+                assert self.key2entry[obj.key] is obj
+            a += size
+        if display:
+            print
+        assert a == self.maxsize
--- a/src/ZEO/tests/test_cache.py
+++ b/src/ZEO/tests/test_cache.py
@@ -114,13 +114,11 @@ class CacheTests(unittest.TestCase):
            n = p64(i)
            self.cache.store(n, "", n, None, data[i])
            self.assertEquals(len(self.cache), i + 1)
-            self.assert_(self.cache.fc.currentsize < maxsize)
        # The cache now uses 1225 bytes.  The next insert
        # should delete some objects.
        n = p64(50)
        self.cache.store(n, "", n, None, data[51])
        self.assert_(len(self.cache) < 51)
-        self.assert_(self.cache.fc.currentsize <= maxsize)
        # XXX Need to make sure eviction of non-current data
        # and of version data are handled correctly.