#! /usr/bin/env python
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Cache simulation.

Usage: simul.py [-b] [-l] [-s size] tracefile

-b: simulate buddy system cache (default: simulate ZEO 2.0 cache)
-l: simulate idealized LRU cache (default: simulate ZEO 2.0 cache)
-s size: cache size in MB (default 20 MB)
"""

import sys
import time
import getopt
import struct

def usage(msg):
    print >>sys.stderr, msg
    print >>sys.stderr, __doc__

def main():
    # Parse options
    cachelimit = 20*1000*1000
    simclass = ZEOCacheSimulation
    try:
        opts, args = getopt.getopt(sys.argv[1:], "bls:")
    except getopt.error, msg:
        usage(msg)
        return 2
    for o, a in opts:
        if o == '-b':
            simclass = BuddyCacheSimulation
        if o == '-l':
            simclass = LRUCacheSimulation
        if o == '-s':
            cachelimit = int(float(a) * 1e6)
    if len(args) != 1:
        usage("exactly one file argument required")
        return 2
    filename = args[0]

    # Open file
    if filename.endswith(".gz"):
        # Open gzipped file
        try:
            import gzip
        except ImportError:
            print >>sys.stderr,  "can't read gzipped files (no module gzip)"
            return 1
        try:
            f = gzip.open(filename, "rb")
        except IOError, msg:
            print >>sys.stderr,  "can't open %s: %s" % (filename, msg)
            return 1
    elif filename == "-":
        # Read from stdin
        f = sys.stdin
    else:
        # Open regular file
        try:
            f = open(filename, "rb")
        except IOError, msg:
            print >>sys.stderr,  "can't open %s: %s" % (filename, msg)
            return 1

    # Create simulation object
    sim = simclass(cachelimit)

    # Print output header
    sim.printheader()

    # Read trace file, simulating cache behavior
    while 1:
        # Read a record
        r = f.read(24)
        if len(r) < 24:
            break
        # Decode it
        ts, code, oid, serial = struct.unpack(">ii8s8s", r)
        dlen, version, code, current = (code & 0x7fffff00,
                                        code & 0x80,
                                        code & 0x7e,
                                        code & 0x01)
        # And pass it to the simulation
        sim.event(ts, dlen, version, code, current, oid, serial)

    # Finish simulation
    sim.finish()

    # Exit code from main()
    return 0

class Simulation:

    """Base class for simulations.

    The driver program calls: event(), printheader(), finish().

    The standard event() method calls these additional methods:
    write(), load(), inval(), report(), restart(); the standard
    finish() method also calls report().

    """

    def __init__(self, cachelimit):
        self.cachelimit = cachelimit
        # Initialize global statistics
        self.epoch = None
        self.total_loads = 0
        self.total_hits = 0 # Subclass must increment
        self.total_invals = 0
        self.total_writes = 0
        # Reset per-run statistics and set up simulation data
        self.restart()

    def restart(self):
        # Reset per-run statistics
        self.loads = 0
        self.hits = 0 # Subclass must increment
        self.invals = 0
        self.writes = 0
        self.ts0 = None

    def event(self, ts, dlen, _version, code, _current, oid, _serial):
        # Record first and last timestamp seen
        if self.ts0 is None:
            self.ts0 = ts
            if self.epoch is None:
                self.epoch = ts
        self.ts1 = ts

        # Simulate cache behavior.  Use load hits, updates and stores
        # only (each load miss is followed immediately by a store
        # unless the object in fact did not exist).  Updates always write.
        if dlen and code & 0x70 in (0x20, 0x30, 0x50):
            if code == 0x3A:
                # Update
                self.writes += 1
                self.total_writes += 1
                self.write(oid, dlen)
            else:
                # Load hit or store -- these are really the load requests
                self.loads += 1
                self.total_loads += 1
                self.load(oid, dlen)
        elif code & 0x70 == 0x10:
            # Invalidate
            self.inval(oid)
        elif code == 0x00:
            # Restart
            self.report()
            self.restart()

    def printheader(self):
        pass

    def write(self, oid, size):
        pass

    def load(self, oid, size):
        pass

    def inval(self, oid):
        pass

    def finish(self):
        self.report()

    def report(self):
        pass

class ZEOCacheSimulation(Simulation):

    """Simulate the current (ZEO 1.0 and 2.0) ZEO cache behavior.

    This assumes the cache is not persistent (we don't know how to
    simulate cache validation.)

    """

    def __init__(self, cachelimit):
        # Initialize base class
        Simulation.__init__(self, cachelimit)
        # Initialize additional global statistics
        self.total_flips = 0

    def restart(self):
        # Reset base class
        Simulation.restart(self)
        # Reset additional per-run statistics
        self.flips = 0
        # Set up simulation
        self.filesize = [4, 4] # account for magic number
        self.fileoids = [{}, {}]
        self.current = 0 # index into filesize, fileoids

    def load(self, oid, size):
        if (self.fileoids[self.current].get(oid) or
            self.fileoids[1 - self.current].get(oid)):
            self.hits += 1
            self.total_hits += 1
        else:
            self.write(oid, size)

    def write(self, oid, size):
        # Fudge because size is rounded up to multiples of 256.  (31
        # is header overhead per cache record; 127 is to compensate
        # for rounding up to multiples of 256.)
        size = size + 31 - 127
        if self.filesize[self.current] + size > self.cachelimit / 2:
            # Cache flip
            self.flips += 1
            self.total_flips += 1
            self.current = 1 - self.current
            self.filesize[self.current] = 4
            self.fileoids[self.current] = {}
        self.filesize[self.current] += size
        self.fileoids[self.current][oid] = 1

    def inval(self, oid):
        if self.fileoids[self.current].get(oid):
            self.invals += 1
            self.total_invals += 1
            del self.fileoids[self.current][oid]
        elif self.fileoids[1 - self.current].get(oid):
            self.invals += 1
            self.total_invals += 1
            del self.fileoids[1 - self.current][oid]

    format = "%12s %9s %8s %8s %6s %6s %6s %6s"

    def printheader(self):
        print self.format % (
            "START TIME", "DURATION", "LOADS", "HITS",
            "INVALS", "WRITES", "FLIPS", "HITRATE")

    def report(self):
        if self.loads:
            print self.format % (
                time.ctime(self.ts0)[4:-8],
                duration(self.ts1 - self.ts0),
                self.loads, self.hits, self.invals, self.writes, self.flips,
                hitrate(self.loads, self.hits))

    def finish(self):
        self.report()
        if self.total_loads:
            print (self.format + " OVERALL") % (
                time.ctime(self.epoch)[4:-8],
                duration(self.ts1 - self.epoch),
                self.total_loads,
                self.total_hits,
                self.total_invals,
                self.total_writes,
                self.total_flips,
                hitrate(self.total_loads, self.total_hits))

class LRUCacheSimulation(Simulation):

    def __init__(self, cachelimit):
        # Initialize base class
        Simulation.__init__(self, cachelimit)
        # Initialize additional global statistics
        self.total_evicts = 0

    def restart(self):
        # Reset base class
        Simulation.restart(self)
        # Reset additional per-run statistics
        self.evicts = 0
        # Set up simulation
        self.cache = {}
        self.size = 0
        self.head = Node(None, None)
        self.head.linkbefore(self.head)

    def load(self, oid, size):
        node = self.cache.get(oid)
        if node is not None:
            self.hits += 1
            self.total_hits += 1
            node.linkbefore(self.head)
        else:
            self.write(oid, size)

    def write(self, oid, size):
        node = self.cache.get(oid)
        if node is not None:
            node.unlink()
            assert self.head.next is not None
            self.size -= node.size
        node = Node(oid, size)
        self.cache[oid] = node
        node.linkbefore(self.head)
        self.size += size
        # Evict LRU nodes
        while self.size > self.cachelimit:
            self.evicts += 1
            self.total_evicts += 1
            node = self.head.next
            assert node is not self.head
            node.unlink()
            assert self.head.next is not None
            del self.cache[node.oid]
            self.size -= node.size

    def inval(self, oid):
        node = self.cache.get(oid)
        if node is not None:
            assert node.oid == oid
            self.invals += 1
            self.total_invals += 1
            node.unlink()
            assert self.head.next is not None
            del self.cache[oid]
            self.size -= node.size
            assert self.size >= 0

    format = "%12s %9s %8s %8s %6s %6s %6s %6s"

    def printheader(self):
        print self.format % (
            "START TIME", "DURATION", "LOADS", "HITS",
            "INVALS", "WRITES", "EVICTS", "HITRATE")

    def report(self):
        if self.loads:
            print self.format % (
                time.ctime(self.ts0)[4:-8],
                duration(self.ts1 - self.ts0),
                self.loads, self.hits, self.invals, self.writes, self.evicts,
                hitrate(self.loads, self.hits))

    def finish(self):
        self.report()
        if self.total_loads:
            print (self.format + " OVERALL") % (
                time.ctime(self.epoch)[4:-8],
                duration(self.ts1 - self.epoch),
                self.total_loads,
                self.total_hits,
                self.total_invals,
                self.total_writes,
                self.total_evicts,
                hitrate(self.total_loads, self.total_hits))

class Node:

    """Node in a doubly-linked list, storing oid and size as payload.

    A node can be linked or unlinked; in the latter case, next and
    prev are None.  Initially a node is unlinked.

    """
    # Make it a new-style class in Python 2.2 and up; no effect in 2.1
    __metaclass__ = type
    __slots__ = ['prev', 'next', 'oid', 'size']

    def __init__(self, oid, size):
        self.oid = oid
        self.size = size
        self.prev = self.next = None

    def unlink(self):
        prev = self.prev
        next = self.next
        if prev is not None:
            assert next is not None
            assert prev.next is self
            assert next.prev is self
            prev.next = next
            next.prev = prev
            self.prev = self.next = None
        else:
            assert next is None

    def linkbefore(self, next):
        self.unlink()
        prev = next.prev
        if prev is None:
            assert next.next is None
            prev = next
        self.prev = prev
        self.next = next
        prev.next = next.prev = self

class BuddyCacheSimulation(LRUCacheSimulation):

    def __init__(self, cachelimit):
        LRUCacheSimulation.__init__(self, roundup(cachelimit))

    def restart(self):
        LRUCacheSimulation.restart(self)
        self.allocator = BuddyAllocator(self.cachelimit)

    # LRUCacheSimulation.load() is just fine

    def write(self, oid, size):
        node = self.cache.get(oid)
        if node is not None:
            node.unlink()
            assert self.head.next is not None
            self.size -= node.size
            self.allocator.free(node)
        while 1:
            node = self.allocator.alloc(size)
            if node is not None:
                break
            # Failure to allocate.  Evict something and try again.
            node = self.head.next
            assert node is not self.head
            self.evicts += 1
            self.total_evicts += 1
            node.unlink()
            assert self.head.next is not None
            del self.cache[node.oid]
            self.size -= node.size
            self.allocator.free(node)
        node.oid = oid
        self.cache[oid] = node
        node.linkbefore(self.head)
        self.size += node.size

    def inval(self, oid):
        node = self.cache.get(oid)
        if node is not None:
            assert node.oid == oid
            self.invals += 1
            self.total_invals += 1
            node.unlink()
            assert self.head.next is not None
            del self.cache[oid]
            self.size -= node.size
            assert self.size >= 0
            self.allocator.free(node)

class BuddyNode(Node):

    __slots__ = ['addr']

    def __init__(self, oid, size, addr):
        Node.__init__(self, oid, size)
        self.addr = addr

MINSIZE = 256

def roundup(size):
    k = MINSIZE
    while k < size:
        k += k
    return k

class BuddyAllocator:

    def __init__(self, cachelimit):
        cachelimit = roundup(cachelimit)
        self.cachelimit = cachelimit
        self.avail = {} # Map rounded-up sizes to free list node heads
        self.nodes = {} # Map address to node
        k = MINSIZE
        while k <= cachelimit:
            self.avail[k] = n = Node(None, None) # Not BuddyNode; has no addr
            n.linkbefore(n)
            k += k
        node = BuddyNode(None, cachelimit, 0)
        self.nodes[0] = node
        node.linkbefore(self.avail[cachelimit])

    def alloc(self, size):
        size = roundup(size)
        k = size
        while k <= self.cachelimit:
            head = self.avail[k]
            node = head.next
            if node is not head:
                break
            k += k
        else:
            return None # Store is full, or block is too large
        node.unlink()
        size2 = node.size
        while size2 > size:
            size2 = size2 / 2
            assert size2 >= size
            node.size = size2
            buddy = BuddyNode(None, size2, node.addr + size2)
            self.nodes[buddy.addr] = buddy
            buddy.linkbefore(self.avail[size2])
        node.oid = 1 # Flag as in-use
        return node

    def free(self, node):
        assert node is self.nodes[node.addr]
        assert node.prev is node.next is None
        node.oid = None # Flag as free
        while node.size < self.cachelimit:
            buddy_addr = node.addr ^ node.size
            buddy = self.nodes[buddy_addr]
            assert buddy.addr == buddy_addr
            if buddy.oid is not None or buddy.size != node.size:
                break
            # Merge node with buddy
            buddy.unlink()
            if buddy.addr < node.addr: # buddy prevails
                del self.nodes[node.addr]
                node = buddy
            else: # node prevails
                del self.nodes[buddy.addr]
            node.size *= 2
        assert node is self.nodes[node.addr]
        node.linkbefore(self.avail[node.size])

    def dump(self, msg=""):
        if msg:
            print msg,
        size = MINSIZE
        blocks = bytes = 0
        while size <= self.cachelimit:
            head = self.avail[size]
            node = head.next
            count = 0
            while node is not head:
                count += 1
                node = node.next
            if count:
                print "%d:%d" % (size, count),
            blocks += count
            bytes += count*size
            size += size
        print "-- %d, %d" % (bytes, blocks)

def testbuddy():
    # Run one of Knuth's experiments as a test
    import random, heapq
    reportfreq = 100
    cachelimit = 2**17
    cache = BuddyAllocator(cachelimit)
    queue = []
    T = 0
    while T < 5000:
        while queue and queue[0][0] <= T:
            time, node = heapq.heappop(queue)
            assert time == T
            cache.free(node)
        size = random.randint(100, 2000)
        lifetime = random.randint(1, 100)
        node = cache.alloc(size)
        if node is None:
            print "out of mem"
            cache.dump("T=%d:" % T)
            break
        else:
            heapq.heappush(queue, (T + lifetime, node))
        T = T+1
        if T % reportfreq == 0:
            cache.dump("T=%d:" % T)

def hitrate(loads, hits):
    return "%5.1f%%" % (100.0 * hits / max(1, loads))

def duration(secs):

    mm, ss = divmod(secs, 60)
    hh, mm = divmod(mm, 60)
    if hh:
        return "%d:%02d:%02d" % (hh, mm, ss)
    if mm:
        return "%d:%02d" % (mm, ss)
    return "%d" % ss

if __name__ == "__main__":
    sys.exit(main())