Commit 67d48555 authored by Tim Peters's avatar Tim Peters

Merge rev 37422 from 3.4 branch.

Massive rewrite to make simul.py's circular cache aware of
MVCC.  It isn't blowing up, but it thinks the hit rate is
100% -- might have missed something there <wink>.
parent f46415f5
...@@ -205,17 +205,20 @@ class ClientCache(object): ...@@ -205,17 +205,20 @@ class ClientCache(object):
if L is None: if L is None:
self._trace(0x24, oid, tid) self._trace(0x24, oid, tid)
return None return None
# A pair with None as the second element will always be less # A pair with None as the second element is less than any pair with
# than any pair with the same first tid. # the same first tid. Dubious: this relies on that None is less
# than any comparable non-None object in recent Pythons.
i = bisect.bisect_left(L, (tid, None)) i = bisect.bisect_left(L, (tid, None))
# The least element left of tid was written before tid. If # Now L[i-1] < (tid, None) < L[i], and the start_tid for everything in
# there is no element, the cache doesn't have old enough data. # L[:i} is < tid, and the start_tid for everything in L[i:] is >= tid.
# Therefore the largest start_tid < tid must be at L[i-1]. If i is 0,
# there is no start_tid < tid: we don't have any data old enougn.
if i == 0: if i == 0:
self._trace(0x24, oid, tid) self._trace(0x24, oid, tid)
return return
lo, hi = L[i-1] lo, hi = L[i-1]
# lo should always be less than tid assert lo < tid
if not lo < tid <= hi: if tid > hi: # we don't have any data in the right range
self._trace(0x24, oid, tid) self._trace(0x24, oid, tid)
return None return None
o = self.fc.access((oid, lo)) o = self.fc.access((oid, lo))
...@@ -284,7 +287,7 @@ class ClientCache(object): ...@@ -284,7 +287,7 @@ class ClientCache(object):
p = start_tid, end_tid p = start_tid, end_tid
if p in L: if p in L:
return # duplicate store return # duplicate store
bisect.insort_left(L, (start_tid, end_tid)) bisect.insort_left(L, p)
self._trace(0x54, oid, version, start_tid, end_tid, self._trace(0x54, oid, version, start_tid, end_tid,
dlen=len(data)) dlen=len(data))
self.fc.add(o) self.fc.add(o)
...@@ -457,7 +460,7 @@ class ClientCache(object): ...@@ -457,7 +460,7 @@ class ClientCache(object):
self._trace = notrace self._trace = notrace
def _trace(self, def _trace(self,
code, oid="", version="", tid="", end_tid=z64, dlen=0, code, oid="", version="", tid=z64, end_tid=z64, dlen=0,
# The next two are just speed hacks. # The next two are just speed hacks.
time_time=time.time, struct_pack=struct.pack): time_time=time.time, struct_pack=struct.pack):
# The code argument is two hex digits; bits 0 and 7 must be zero. # The code argument is two hex digits; bits 0 and 7 must be zero.
......
...@@ -34,12 +34,14 @@ import time ...@@ -34,12 +34,14 @@ import time
import getopt import getopt
import struct import struct
import math import math
import bisect
from sets import Set from sets import Set
from ZODB.utils import z64
def usage(msg): def usage(msg):
print >>sys.stderr, msg print >> sys.stderr, msg
print >>sys.stderr, __doc__ print >> sys.stderr, __doc__
def main(): def main():
# Parse options. # Parse options.
...@@ -143,7 +145,7 @@ def main(): ...@@ -143,7 +145,7 @@ def main():
code & 0x7e, code & 0x7e,
code & 0x01) code & 0x01)
# And pass it to the simulation. # And pass it to the simulation.
sim.event(ts, dlen, version, code, current, oid, start_tid) sim.event(ts, dlen, version, code, current, oid, start_tid, end_tid)
f.close() f.close()
# Finish simulation. # Finish simulation.
...@@ -185,7 +187,8 @@ class Simulation(object): ...@@ -185,7 +187,8 @@ class Simulation(object):
self.writes = 0 self.writes = 0
self.ts0 = None self.ts0 = None
def event(self, ts, dlen, _version, code, _current, oid, _serial): def event(self, ts, dlen, _version, code, _current, oid,
start_tid, end_tid):
# Record first and last timestamp seen. # Record first and last timestamp seen.
if self.ts0 is None: if self.ts0 is None:
self.ts0 = ts self.ts0 = ts
...@@ -203,15 +206,14 @@ class Simulation(object): ...@@ -203,15 +206,14 @@ class Simulation(object):
self.loads += 1 self.loads += 1
self.total_loads += 1 self.total_loads += 1
assert (dlen == 0) == (code in (0x20, 0x24)) assert (dlen == 0) == (code in (0x20, 0x24))
if dlen: self.load(oid, dlen, start_tid)
self.load(oid, dlen)
elif action == 0x50: elif action == 0x50:
# Store. # Store.
assert dlen assert dlen
self.write(oid, dlen) self.write(oid, dlen, start_tid, end_tid)
elif action == 0x10: elif action == 0x10:
# Invalidate. # Invalidate.
self.inval(oid) self.inval(oid, start_tid)
elif action == 0x00: elif action == 0x00:
# Restart. # Restart.
self.report() self.report()
...@@ -219,14 +221,14 @@ class Simulation(object): ...@@ -219,14 +221,14 @@ class Simulation(object):
else: else:
raise ValueError("unknown trace code 0x%x" % code) raise ValueError("unknown trace code 0x%x" % code)
def write(self, oid, size): def write(self, oid, size, start_tid, end_tid):
pass pass
def load(self, oid, size): def load(self, oid, size, start_tid):
# Must increment .hits and .total_hits as appropriate. # Must increment .hits and .total_hits as appropriate.
pass pass
def inval(self, oid): def inval(self, oid, start_tid):
# Must increment .invals and .total_invals as appropriate. # Must increment .invals and .total_invals as appropriate.
pass pass
...@@ -280,6 +282,255 @@ class Simulation(object): ...@@ -280,6 +282,255 @@ class Simulation(object):
for name in self.extras]) for name in self.extras])
print (self.format + " OVERALL") % args print (self.format + " OVERALL") % args
from ZEO.cache import ZEC3_HEADER_SIZE
# An Entry just wraps a (key, offset) pair. A key is in turn an
# (oid, tid) pair.
from ZEO.cache import Entry
class CircularCacheSimulation(Simulation):
# The cache is managed as a single file with a pointer that
# goes around the file, circularly, forever. New objects
# are written at the current pointer, evicting whatever was
# there previously.
extras = "evicts", "inuse"
def __init__(self, cachelimit):
from ZEO import cache
from BTrees.OIBTree import OIBTree
Simulation.__init__(self, cachelimit)
self.total_evicts = 0
# Current offset in file.
self.offset = ZEC3_HEADER_SIZE
# Map offset in file to (size, Entry) pair, or to (size, None) if
# the offset starts a free block.
self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
None)}
# Map key to Entry. A key is an (oid, tid) pair.
self.key2entry = {}
# Map oid to tid of current revision.
self.current = {}
# Map oid to list of (start_tid, end_tid) pairs in sorted order.
# Used to find matching key for load of non-current data.
self.noncurrent = {}
# Map key (an (oid, tid) pair) to the size of the object state.
# Unlike the others, this accumulates knowledge over time, regardless
# of what's in the cache. The problem: the trace file may have
# a load hit where we have a load miss. There won't be a store in
# the trace file "soon" since the actual cache had the data. What
# can the simulated cache do? If the object has ever been seen
# before, it can look up its size in this dict, and "pretend" to
# do a store. This isn't faithful in all cases, since we don't
# know the right tid: if we miss on a plain load(), the trace
# fail contains no indication of the appropriate tid.
self.key2size = OIBTree()
# The number of overhead bytes needed to store an object pickle
# on disk (all bytes beyond those needed for the object pickle).
self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
cache.OBJECT_HEADER_SIZE)
def restart(self):
Simulation.restart(self)
self.evicts = 0
def load(self, oid, size, tid):
if tid == z64:
# Trying to load current revision.
if oid in self.current:
self.hits += 1
self.total_hits += 1
else:
self._cache_miss(oid, tid)
return
# May or may not be trying to load current revisiion.
cur_tid = self.current.get(oid)
if cur_tid == tid:
self.hits += 1
self.total_hits += 1
return
# It's a load for non-current data. Do we know about this oid?
L = self.noncurrent.get(oid)
if L is None:
self._cache_miss(oid, tid)
return
i = bisect.bisect_left(L, (tid, None))
if i == 0:
# This tid is smaller than any we know about -- miss.
self._cache_miss(oid, tid)
return
lo, hi = L[i-1]
assert lo < tid
if tid > hi:
self._cache_miss(oid, tid)
return
# Cache hit.
self.hits += 1
self.total_hits += 1
def _cache_miss(self, oid, tid, HUGE64='\xff'*8):
have_data = False
if tid == z64:
# Miss on current data. Find the most revision we ever saw.
items = self.key2size.items(min=(oid, z64), max=(oid, HUGE64))
if items:
(oid, tid), size = items[-1] # most recent
have_data = True
else:
# Miss on non-current data. Find one "that fits", approximately.
items = self.key2size.items(min=(oid, tid), max=(oid, HUGE64))
if items:
(oid, tid), size = items[0] # first one at or after tid
have_data = True
if have_data:
# Pretend the cache miss was followed by a store.
self.writes += 1
self.total_writes += 1
self.add(oid, tid, size)
# (oid, tid) is in the cache. Remove it: take it out of key2entry,
# and in `filemap` mark the space it occupied as being free.
def _remove(self, oid, tid):
key = oid, tid
e = self.key2entry.pop(key)
pos = e.offset
size, _e = self.filemap[pos]
assert e is _e
self.filemap[pos] = size, None
def _remove_noncurrent_revisions(self, oid):
noncurrent_list = self.noncurrent.get(oid)
if noncurrent_list:
self.invals += len(noncurrent_list)
self.total_invals += len(noncurrent_list)
for start_tid, end_tid in noncurrent_list:
self._remove(oid, start_tid)
del self.noncurrent[oid]
def inval(self, oid, tid):
if tid == z64:
# This is part of startup cache verification.
self._remove_noncurrent_revisions(oid, version)
cur_tid = self.current.get(oid)
if cur_tid is None:
# We don't have current data, so nothing to do.
return
# We had current data for oid, but no longer.
self.invals += 1
self.total_invals += 1
del self.current[oid]
if tid == z64:
# Startup cache verification: forget this oid entirely.
self._remove(oid, current_tid)
return
# Add the validty range to the list of non-current data for oid.
assert cur_tid < tid
L = self.noncurrent.setdefault(oid, [])
bisect.insort_left(L, (cur_tid, tid))
def write(self, oid, size, start_tid, end_tid):
if end_tid == z64:
# Storing current revision.
if oid in self.current: # we already have it in cache
return
self.current[oid] = start_tid
self.key2size[oid, start_tid] = size
self.writes += 1
self.total_writes += 1
self.add(oid, start_tid, size)
return
# Storing non-current revision.
L = self.noncurrent.setdefault(oid, [])
p = start_tid, end_tid
if p in L:
return # we already have it in cache
bisect.insort_left(L, p)
self.key2size[(oid, start_tid)] = size
self.writes += 1
self.total_writes += 1
self.add(oid, start_tid, size)
def add(self, oid, tid, size):
size += self.overhead
avail = self.makeroom(size)
key = oid, tid
assert key not in self.key2entry
e = Entry(key, self.offset)
self.filemap[self.offset] = size, e
self.key2entry[key] = e
self.offset += size
# All the space made available must be accounted for in filemap.
excess = avail - size
if excess:
self.filemap[self.offset] = excess, None
def makeroom(self, need):
# Evict enough objects to make the necessary space available.
if self.offset + need > self.cachelimit:
self.offset = ZEC3_HEADER_SIZE
pos = self.offset
while need > 0:
assert pos < self.cachelimit
try:
size, e = self.filemap[pos]
except KeyError:
self.dump()
raise
del self.filemap[pos]
if e:
self.evicts += 1
self.total_evicts += 1
assert pos == e.offset
_e = self.key2entry.pop(e.key)
assert e is _e
need -= size
pos += size
return pos - self.offset # total number of bytes freed
def report(self):
self.check()
free = used = total = 0
for size, e in self.filemap.itervalues():
total += size
if e:
used += size
else:
free += size
self.inuse = round(100.0 * used / total, 1)
self.total_inuse = self.inuse
Simulation.report(self)
def check(self):
oidcount = 0
pos = ZEC3_HEADER_SIZE
while pos < self.cachelimit:
size, e = self.filemap[pos]
if e:
oidcount += 1
assert self.key2entry[e.key].offset == pos
pos += size
assert oidcount == len(self.key2entry)
assert pos == self.cachelimit
def dump(self):
print len(self.filemap)
L = list(self.filemap)
L.sort()
for k in L:
v = self.filemap[k]
print k, v[0], repr(v[1])
class ZEOCacheSimulation(Simulation): class ZEOCacheSimulation(Simulation):
"""Simulate the ZEO 1.0 and 2.0cache behavior. """Simulate the ZEO 1.0 and 2.0cache behavior.
...@@ -971,131 +1222,6 @@ class OracleSimulation(LRUCacheSimulation): ...@@ -971,131 +1222,6 @@ class OracleSimulation(LRUCacheSimulation):
print "Scanned file, %d unique oids, %d repeats" % ( print "Scanned file, %d unique oids, %d repeats" % (
all, len(self.count)) all, len(self.count))
from ZEO.cache import ZEC3_HEADER_SIZE
class CircularCacheSimulation(Simulation):
# The cache is managed as a single file with a pointer that
# goes around the file, circularly, forever. New objects
# are written at the current pointer, evicting whatever was
# there previously.
extras = "evicts", "inuse"
def __init__(self, cachelimit):
from ZEO import cache
Simulation.__init__(self, cachelimit)
self.total_evicts = 0
# Current offset in file.
self.offset = ZEC3_HEADER_SIZE
# Map offset in file to (size, oid) pair.
self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
None)}
# Map oid to file offset.
self.oid2ofs = {}
self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
cache.OBJECT_HEADER_SIZE)
def restart(self):
Simulation.restart(self)
self.evicts = 0
def load(self, oid, size):
if oid in self.oid2ofs:
self.hits += 1
self.total_hits += 1
elif size:
self.writes += 1
self.total_writes += 1
self.add(oid, size)
# Else it was a load miss in the trace file, and a load miss here too.
def inval(self, oid):
pos = self.oid2ofs.pop(oid, None)
if pos is None:
return
self.invals += 1
self.total_invals += 1
size, _oid = self.filemap[pos]
assert oid == _oid
self.filemap[pos] = size, None
def write(self, oid, size):
if oid not in self.oid2ofs:
self.writes += 1
self.total_writes += 1
self.add(oid, size)
def add(self, oid, size):
size += self.overhead
avail = self.makeroom(size)
assert oid not in self.oid2ofs
self.filemap[self.offset] = size, oid
self.oid2ofs[oid] = self.offset
self.offset += size
# All the space made available must be accounted for in filemap.
excess = avail - size
if excess:
self.filemap[self.offset] = excess, None
def makeroom(self, need):
# Evict enough objects to make the necessary space available.
if self.offset + need > self.cachelimit:
self.offset = ZEC3_HEADER_SIZE
pos = self.offset
while need > 0:
assert pos < self.cachelimit
try:
size, oid = self.filemap[pos]
except KeyError:
self.dump()
raise
del self.filemap[pos]
if oid:
self.evicts += 1
self.total_evicts += 1
_pos = self.oid2ofs.pop(oid)
assert pos == _pos
need -= size
pos += size
return pos - self.offset # total number of bytes freed
def report(self):
self.check()
free = used = total = 0
for size, oid in self.filemap.itervalues():
total += size
if oid:
used += size
else:
free += size
self.inuse = round(100.0 * used / total, 1)
self.total_inuse = self.inuse
Simulation.report(self)
def check(self):
oidcount = 0
pos = ZEC3_HEADER_SIZE
while pos < self.cachelimit:
size, oid = self.filemap[pos]
if oid:
oidcount += 1
assert self.oid2ofs[oid] == pos
pos += size
assert oidcount == len(self.oid2ofs)
assert pos == self.cachelimit
def dump(self):
print len(self.filemap)
L = list(self.filemap)
L.sort()
for k in L:
v = self.filemap[k]
print k, v[0], repr(v[1])
class BuddyCacheSimulation(LRUCacheSimulation): class BuddyCacheSimulation(LRUCacheSimulation):
def __init__(self, cachelimit): def __init__(self, cachelimit):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment