Use tagged version of src/scripts

643a9757 · root · b0e88871 · 643a9757 · 643a9757 · 643a9757
Commit 643a9757 authored May 11, 2004 by root
23 changed files
--- a/utilities/ZODBTools/README.txt
+++ b/utilities/ZODBTools/README.txt
+This directory contains a collection of utilities for managing ZODB
+databases.  Some are more useful than others.  If you install ZODB
+using distutils ("python setup.py install"), fsdump.py, fstest.py,
+repozo.py, and zeopack.py will be installed in /usr/local/bin.
+
+Unless otherwise noted, these scripts are invoked with the name of the
+Data.fs file as their only argument.  Example: checkbtrees.py data.fs.
+
+
+analyze.py -- a transaction analyzer for FileStorage
+
+Reports on the data in a FileStorage.  The report is organized by
+class.  It shows total data, as well as separate reports for current
+and historical revisions of objects.
+
+
+checkbtrees.py -- checks BTrees in a FileStorage for corruption
+
+Attempts to find all the BTrees contained in a Data.fs, calls their
+_check() methods, and runs them through BTrees.check.check().
+
+
+fsdump.py -- summarize FileStorage contents, one line per revision
+
+Prints a report of FileStorage contents, with one line for each
+transaction and one line for each data record in that transaction.
+Includes time stamps, file positions, and class names.
+
+
+fstest.py -- simple consistency checker for FileStorage
+
+usage: fstest.py [-v] data.fs
+
+The fstest tool will scan all the data in a FileStorage and report an
+error if it finds any corrupt transaction data.  The tool will print a
+message when the first error is detected an exit.
+
+The tool accepts one or more -v arguments.  If a single -v is used, it
+will print a line of text for each transaction record it encounters.
+If two -v arguments are used, it will also print a line of text for
+each object.  The objects for a transaction will be printed before the
+transaction itself.
+
+Note: It does not check the consistency of the object pickles.  It is
+possible for the damage to occur only in the part of the file that
+stores object pickles.  Those errors will go undetected.
+
+
+space.py -- report space used by objects in a FileStorage
+
+usage: space.py [-v] data.fs
+
+This ignores revisions and versions.
+
+
+netspace.py -- hackish attempt to report on size of objects
+
+usage: netspace.py [-P | -v] data.fs
+
+-P: do a pack first
+-v: print info for all objects, even if a traversal path isn't found
+
+Traverses objects from the database root and attempts to calculate
+size of object, including all reachable subobjects.
+
+
+parsezeolog.py -- parse BLATHER logs from ZEO server
+
+This script may be obsolete.  It has not been tested against the
+current log output of the ZEO server.
+
+Reports on the time and size of transactions committed by a ZEO
+server, by inspecting log messages at BLATHER level.
+
+
+repozo.py -- incremental backup utility for FileStorage
+
+Run the script with the -h option to see usage details.
+
+
+timeout.py -- script to test transaction timeout
+
+usage: timeout.py address delay [storage-name]
+
+This script connects to a storage, begins a transaction, calls store()
+and tpc_vote(), and then sleeps forever.  This should trigger the
+transaction timeout feature of the server.
+
+
+zeopack.py -- pack a ZEO server
+
+The script connects to a server and calls pack() on a specific
+storage.  See the script for usage details.
+
+
+zeoreplay.py -- experimental script to replay transactions from a ZEO log
+
+Like parsezeolog.py, this may be obsolete because it was written
+against an earlier version of the ZEO server.  See the script for
+usage details.
+
+
+zeoup.py
+
+usage: zeoup.py [options]
+
+The test will connect to a ZEO server, load the root object, and
+attempt to update the zeoup counter in the root.  It will report
+success if it updates to counter or if it gets a ConflictError.  A
+ConflictError is considered a success, because the client was able to
+start a transaction.
+
+See the script for details about the options.
+
+
+zodbload.py -- exercise ZODB under a heavy synthesized Zope-like load
+
+See the module docstring for details.  Note that this script requires
+Zope.  New in ZODB3 3.1.4.
+
+
+zeoserverlog.py -- analyze ZEO server log for performance statistics
+
+See the module docstring for details; there are a large number of
+options.  New in ZODB3 3.1.4.
+
+
+fsrefs.py -- check FileStorage for dangling references
+
+
+fstail.py -- display the most recent transactions in a FileStorage
+
+usage:  fstail.py [-n nxtn] data.fs
+
+The most recent ntxn transactions are displayed, to stdout.
+Optional argument -n specifies ntxn, and defaults to 10.
+
+
+migrate.py -- do a storage migration and gather statistics
+
+See the module docstring for details.
+
+
+zeoqueue.py -- report number of clients currently waiting in the ZEO queue
+
+See the module docstring for details.
--- a/utilities/ZODBTools/analyze.py
+++ b/utilities/ZODBTools/analyze.py
+#!/usr/bin/env python2.3
+
+# Based on a transaction analyzer by Matt Kromer.
+
+import pickle
+import re
+import sys
+import types
+from ZODB.FileStorage import FileStorage
+
+class Report:
+    def __init__(self):
+        self.OIDMAP = {}
+        self.TYPEMAP = {}
+        self.TYPESIZE = {}
+        self.FREEMAP = {}
+        self.USEDMAP = {}
+        self.TIDS = 0
+        self.OIDS = 0
+        self.DBYTES = 0
+        self.COIDS = 0
+        self.CBYTES = 0
+        self.FOIDS = 0
+        self.FBYTES = 0
+
+def shorten(s, n):
+    l = len(s)
+    if l <= n:
+        return s
+    while len(s) + 3 > n: # account for ...
+        i = s.find(".")
+        if i == -1:
+            # In the worst case, just return the rightmost n bytes
+            return s[-n:]
+        else:
+            s = s[i + 1:]
+            l = len(s)
+    return "..." + s
+
+def report(rep):
+    print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
+    print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
+    print ("Average transaction size is %7.2f bytes" %
+           (rep.DBYTES * 1.0 / rep.TIDS))
+
+    print "Types used:"
+    fmt = "%-46s %7s %9s %6s %7s"
+    fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
+    fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
+    print fmt % ("Class Name", "Count", "TBytes", "Pct", "AvgSize")
+    print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
+    typemap = rep.TYPEMAP.keys()
+    typemap.sort()
+    cumpct = 0.0
+    for t in typemap:
+        pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
+        cumpct += pct
+        print fmtp % (shorten(t, 46), rep.TYPEMAP[t], rep.TYPESIZE[t],
+                      pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
+
+    print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
+    print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
+        ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
+    print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
+                  rep.DBYTES * 1.0 / rep.OIDS)
+
+    print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
+                  rep.CBYTES * 100.0 / rep.DBYTES,
+                  rep.CBYTES * 1.0 / rep.COIDS)
+    if rep.FOIDS:
+        print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
+                      rep.FBYTES * 100.0 / rep.DBYTES,
+                      rep.FBYTES * 1.0 / rep.FOIDS)
+
+def analyze(path):
+    fs = FileStorage(path, read_only=1)
+    fsi = fs.iterator()
+    report = Report()
+    while 1:
+        try:
+            transaction = fsi.next()
+        except IndexError:
+            break
+        analyze_trans(report, transaction)
+    return report
+
+def analyze_trans(report, txn):
+    report.TIDS += 1
+    while 1:
+        try:
+            rec = txn.next()
+        except IndexError:
+            break
+        analyze_rec(report, rec)
+
+def get_type(record):
+    try:
+        classinfo = pickle.loads(record.data)[0]
+    except SystemError, err:
+        s = str(err)
+        mo = re.match('Failed to import class (\S+) from module (\S+)', s)
+        if mo is None:
+            raise
+        else:
+            klass, mod = mo.group(1, 2)
+            return "%s.%s" % (mod, klass)
+    if isinstance(classinfo, types.TupleType):
+        mod, klass = classinfo
+        return "%s.%s" % (mod, klass)
+    else:
+        return str(classinfo)
+
+def analyze_rec(report, record):
+    oid = record.oid
+    report.OIDS += 1
+    try:
+        size = len(record.data) # Ignores various overhead
+        report.DBYTES += size
+        if not report.OIDMAP.has_key(oid):
+            type = get_type(record)
+            report.OIDMAP[oid] = type
+            report.USEDMAP[oid] = size
+            report.COIDS += 1
+            report.CBYTES += size
+        else:
+            type = report.OIDMAP[oid]
+            fsize = report.USEDMAP[oid]
+            report.FREEMAP[oid] = report.FREEMAP.get(oid, 0) + fsize
+            report.USEDMAP[oid] = size
+            report.FOIDS += 1
+            report.FBYTES += fsize
+            report.CBYTES += size - fsize
+        report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
+        report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
+    except Exception, err:
+        print err
+
+if __name__ == "__main__":
+    path = sys.argv[1]
+    report(analyze(path))
--- a/utilities/ZODBTools/checkbtrees.py
+++ b/utilities/ZODBTools/checkbtrees.py
+#!/usr/bin/env python2.3
+
+"""Check the consistency of BTrees in a Data.fs
+
+usage: checkbtrees.py data.fs
+
+Try to find all the BTrees in a Data.fs, call their _check() methods,
+and run them through BTrees.check.check().
+"""
+
+from types import IntType
+
+import ZODB
+from ZODB.FileStorage import FileStorage
+from BTrees.check import check
+
+# Set of oids we've already visited.  Since the object structure is
+# a general graph, this is needed to prevent unbounded paths in the
+# presence of cycles.  It's also helpful in eliminating redundant
+# checking when a BTree is pointed to by many objects.
+oids_seen = {}
+
+# Append (obj, path) to L if and only if obj is a persistent object
+# and we haven't seen it before.
+def add_if_new_persistent(L, obj, path):
+    global oids_seen
+
+    getattr(obj, '_', None) # unghostify
+    if hasattr(obj, '_p_oid'):
+        oid = obj._p_oid
+        if not oids_seen.has_key(oid):
+            L.append((obj, path))
+            oids_seen[oid] = 1
+
+def get_subobjects(obj):
+    getattr(obj, '_', None) # unghostify
+    sub = []
+    try:
+        attrs = obj.__dict__.items()
+    except AttributeError:
+        attrs = ()
+    for pair in attrs:
+        sub.append(pair)
+
+    # what if it is a mapping?
+    try:
+        items = obj.items()
+    except AttributeError:
+        items = ()
+    for k, v in items:
+        if not isinstance(k, IntType):
+            sub.append(("<key>", k))
+        if not isinstance(v, IntType):
+            sub.append(("[%s]" % repr(k), v))
+
+    # what if it is a sequence?
+    i = 0
+    while 1:
+        try:
+            elt = obj[i]
+        except:
+            break
+        sub.append(("[%d]" % i, elt))
+        i += 1
+
+    return sub
+
+def main(fname):
+    fs = FileStorage(fname, read_only=1)
+    cn = ZODB.DB(fs).open()
+    rt = cn.root()
+    todo = []
+    add_if_new_persistent(todo, rt, '')
+
+    found = 0
+    while todo:
+        obj, path = todo.pop(0)
+        found += 1
+        if not path:
+            print "<root>", repr(obj)
+        else:
+            print path, repr(obj)
+
+        mod = str(obj.__class__.__module__)
+        if mod.startswith("BTrees"):
+            if hasattr(obj, "_check"):
+                try:
+                    obj._check()
+                except AssertionError, msg:
+                    print "*" * 60
+                    print msg
+                    print "*" * 60
+
+                try:
+                    check(obj)
+                except AssertionError, msg:
+                    print "*" * 60
+                    print msg
+                    print "*" * 60
+
+        if found % 100 == 0:
+            cn.cacheMinimize()
+
+        for k, v in get_subobjects(obj):
+            if k.startswith('['):
+                # getitem
+                newpath = "%s%s" % (path, k)
+            else:
+                newpath = "%s.%s" % (path, k)
+            add_if_new_persistent(todo, v, newpath)
+
+    print "total", len(fs._index), "found", found
+
+if __name__ == "__main__":
+    import sys
+    try:
+        fname, = sys.argv[1:]
+    except:
+        print __doc__
+        sys.exit(2)
+
+    main(fname)
--- a/utilities/ZODBTools/fsdump.py
+++ b/utilities/ZODBTools/fsdump.py
+#!/usr/bin/env python2.3
+
+"""Print a text summary of the contents of a FileStorage."""
+
+from ZODB.FileStorage.fsdump import fsdump
+
+if __name__ == "__main__":
+    import sys
+    fsdump(sys.argv[1])
--- a/utilities/ZODBTools/fsrefs.py
+++ b/utilities/ZODBTools/fsrefs.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+"""Check FileStorage for dangling references.
+
+usage: fsrefs.py data.fs
+
+This script ignores versions, which might produce incorrect results
+for storages that use versions.
+"""
+
+from ZODB.FileStorage import FileStorage
+from ZODB.TimeStamp import TimeStamp
+from ZODB.utils import u64
+from ZODB.FileStorage.fsdump import get_pickle_metadata
+
+import cPickle
+import cStringIO
+import traceback
+import types
+
+VERBOSE = 0
+
+def get_refs(pickle):
+    refs = []
+    f = cStringIO.StringIO(pickle)
+    u = cPickle.Unpickler(f)
+    u.persistent_load = refs
+    u.noload()
+    u.noload()
+    return refs
+
+def report(oid, data, serial, fs, missing):
+    from_mod, from_class = get_pickle_metadata(data)
+    if len(missing) > 1:
+        plural = "s"
+    else:
+        plural = ""
+    ts = TimeStamp(serial)
+    print "oid %s %s.%s" % (hex(u64(oid)), from_mod, from_class)
+    print "last updated: %s, tid=%s" % (ts, hex(u64(serial)))
+    print "refers to invalid object%s:" % plural
+    for oid, info, reason in missing:
+        if isinstance(info, types.TupleType):
+            description = "%s.%s" % info
+        else:
+            description = str(info)
+        print "\toid %s %s: %r" % (hex(u64(oid)), reason, description)
+    print
+
+def main(path):
+    fs = FileStorage(path, read_only=1)
+    noload = {}
+    for oid in fs._index.keys():
+        try:
+            data, serial = fs.load(oid, "")
+        except:
+            print "oid %s failed to load" % hex(u64(oid))
+            if VERBOSE:
+                traceback.print_exc()
+            noload[oid] = 1
+
+            # XXX If we get here after we've already loaded objects
+            # that refer to this one, we won't get error reports from
+            # them.  We could fix this by making two passes over the
+            # storage, but that seems like overkill.
+
+        refs = get_refs(data)
+        missing = [] # contains 3-tuples of oid, klass-metadata, reason
+        for info in refs:
+            try:
+                ref, klass = info
+            except (ValueError, TypeError):
+                # failed to unpack
+                ref = info
+                klass = '<unknown>'
+            if not fs._index.has_key(ref):
+                missing.append((ref, klass, "missing"))
+            if noload.has_key(ref):
+                missing.append((ref, klass, "failed to load"))
+        if missing:
+            report(oid, data, serial, fs, missing)
+
+if __name__ == "__main__":
+    import sys
+    import getopt
+
+    opts, args = getopt.getopt(sys.argv[1:], "v")
+    for k, v in opts:
+        if k == "-v":
+            VERBOSE += 1
+
+    path, = args
+    main(path)
--- a/utilities/ZODBTools/fsstats.py
+++ b/utilities/ZODBTools/fsstats.py
+#!/usr/bin/env python2.3
+
+"""Print details statistics from fsdump output."""
+
+import re
+import sys
+
+rx_txn = re.compile("tid=([0-9a-f]+).*size=(\d+)")
+rx_data = re.compile("oid=([0-9a-f]+) class=(\S+) size=(\d+)")
+
+def sort_byhsize(seq, reverse=False):
+    L = [(v.size(), k, v) for k, v in seq]
+    L.sort()
+    if reverse:
+        L.reverse()
+    return [(k, v) for n, k, v in L]
+
+class Histogram(dict):
+
+    def add(self, size):
+        self[size] = self.get(size, 0) + 1
+
+    def size(self):
+        return sum(self.itervalues())
+
+    def mean(self):
+        product = sum([k * v for k, v in self.iteritems()])
+        return product / self.size()
+
+    def median(self):
+        # close enough?
+        n = self.size() / 2
+        L = self.keys()
+        L.sort()
+        L.reverse()
+        while 1:
+            k = L.pop()
+            if self[k] > n:
+                return k
+            n -= self[k]
+
+    def mode(self):
+        mode = 0
+        value = 0
+        for k, v in self.iteritems():
+            if v > value:
+                value = v
+                mode = k
+        return mode
+
+    def make_bins(self, binsize):
+        maxkey = max(self.iterkeys())
+        self.binsize = binsize
+        self.bins = [0] * (1 + maxkey / binsize)
+        for k, v in self.iteritems():
+            b = k / binsize
+            self.bins[b] += v
+
+    def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
+        if usebins:
+            # Use existing bins with whatever size they have
+            binsize = self.binsize
+        else:
+            # Make new bins
+            self.make_bins(binsize)
+        maxval = max(self.bins)
+        # Print up to 40 dots for a value
+        dot = max(maxval / 40, 1)
+        tot = sum(self.bins)
+        print name
+        print "Total", tot,
+        print "Median", self.median(),
+        print "Mean", self.mean(),
+        print "Mode", self.mode(),
+        print "Max", max(self)
+        print "One * represents", dot
+        gap = False
+        cum = 0
+        for i, n in enumerate(self.bins):
+            if gaps and (not n or (skip and not n / dot)):
+                if not gap:
+                    print "   ..."
+                gap = True
+                continue
+            gap = False
+            p = 100 * n / tot
+            cum += n
+            pc = 100 * cum / tot
+            print "%6d %6d %3d%% %3d%% %s" % (
+                i * binsize, n, p, pc, "*" * (n / dot))
+        print
+
+def class_detail(class_size):
+    # summary of classes
+    fmt = "%5s %6s %6s %6s   %-50.50s"
+    labels = ["num", "median", "mean", "mode", "class"]
+    print fmt % tuple(labels)
+    print fmt % tuple(["-" * len(s) for s in labels])
+    for klass, h in sort_byhsize(class_size.iteritems()):
+        print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
+    print
+
+    # per class details
+    for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
+        h.make_bins(50)
+        if len(filter(None, h.bins)) == 1:
+            continue
+        h.report("Object size for %s" % klass, usebins=True)
+
+def revision_detail(lifetimes, classes):
+    # Report per-class details for any object modified more than once
+    for name, oids in classes.iteritems():
+        h = Histogram()
+        keep = False
+        for oid in dict.fromkeys(oids, 1):
+            L = lifetimes.get(oid)
+            n = len(L)
+            h.add(n)
+            if n > 1:
+                keep = True
+        if keep:
+            h.report("Number of revisions for %s" % name, binsize=10)
+
+def main(path):
+    txn_objects = Histogram() # histogram of txn size in objects
+    txn_bytes = Histogram() # histogram of txn size in bytes
+    obj_size = Histogram() # histogram of object size
+    n_updates = Histogram() # oid -> num updates
+    n_classes = Histogram() # class -> num objects
+    lifetimes = {} # oid -> list of tids
+    class_size = {} # class -> histogram of object size
+    classes = {} # class -> list of oids
+
+    MAX = 0
+    tid = None
+
+    f = open(path, "rb")
+    for i, line in enumerate(f):
+        if MAX and i > MAX:
+            break
+        if line.startswith("  data"):
+            m = rx_data.search(line)
+            if not m:
+                continue
+            oid, klass, size = m.groups()
+            size = int(size)
+
+            obj_size.add(size)
+            n_updates.add(oid)
+            n_classes.add(klass)
+
+            h = class_size.get(klass)
+            if h is None:
+                h = class_size[klass] = Histogram()
+            h.add(size)
+
+            L = lifetimes.setdefault(oid, [])
+            L.append(tid)
+
+            L = classes.setdefault(klass, [])
+            L.append(oid)
+            objects += 1
+
+        elif line.startswith("Trans"):
+
+            if tid is not None:
+                txn_objects.add(objects)
+
+            m = rx_txn.search(line)
+            if not m:
+                continue
+            tid, size = m.groups()
+            size = int(size)
+            objects = 0
+
+            txn_bytes.add(size)
+    f.close()
+
+    print "Summary: %d txns, %d objects, %d revisions" % (
+        txn_objects.size(), len(n_updates), n_updates.size())
+    print
+
+    txn_bytes.report("Transaction size (bytes)", binsize=1024)
+    txn_objects.report("Transaction size (objects)", binsize=10)
+    obj_size.report("Object size", binsize=128)
+
+    # object lifetime info
+    h = Histogram()
+    for k, v in lifetimes.items():
+        h.add(len(v))
+    h.report("Number of revisions", binsize=10, skip=False)
+
+    # details about revisions
+    revision_detail(lifetimes, classes)
+
+    class_detail(class_size)
+
+if __name__ == "__main__":
+    main(sys.argv[1])
--- a/utilities/ZODBTools/fstail.py
+++ b/utilities/ZODBTools/fstail.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Tool to dump the last few transactions from a FileStorage."""
+
+from ZODB.fstools import prev_txn
+
+import binascii
+import getopt
+import sha
+import sys
+
+def main(path, ntxn):
+    f = open(path, "rb")
+    f.seek(0, 2)
+    th = prev_txn(f)
+    i = ntxn
+    while th and i > 0:
+        hash = sha.sha(th.get_raw_data()).digest()
+        l = len(str(th.get_timestamp())) + 1
+        th.read_meta()
+        print "%s: hash=%s" % (th.get_timestamp(),
+                               binascii.hexlify(hash))
+        print ("user=%r description=%r length=%d"
+               % (th.user, th.descr, th.length))
+        print
+        th = th.prev_txn()
+        i -= 1
+
+if __name__ == "__main__":
+    ntxn = 10
+    opts, args = getopt.getopt(sys.argv[1:], "n:")
+    path, = args
+    for k, v in opts:
+        if k == '-n':
+            ntxn = int(v)
+    main(path, ntxn)
--- a/utilities/ZODBTools/fstest.py
+++ b/utilities/ZODBTools/fstest.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+"""Simple consistency checker for FileStorage.
+
+usage: fstest.py [-v] data.fs
+
+The fstest tool will scan all the data in a FileStorage and report an
+error if it finds any corrupt transaction data.  The tool will print a
+message when the first error is detected, then exit.
+
+The tool accepts one or more -v arguments.  If a single -v is used, it
+will print a line of text for each transaction record it encounters.
+If two -v arguments are used, it will also print a line of text for
+each object.  The objects for a transaction will be printed before the
+transaction itself.
+
+Note: It does not check the consistency of the object pickles.  It is
+possible for the damage to occur only in the part of the file that
+stores object pickles.  Those errors will go undetected.
+"""
+
+# The implementation is based closely on the read_index() function in
+# ZODB.FileStorage.  If anything about the FileStorage layout changes,
+# this file will need to be udpated.
+
+import string
+import struct
+import sys
+
+class FormatError(ValueError):
+    """There is a problem with the format of the FileStorage."""
+
+class Status:
+    checkpoint = 'c'
+    undone = 'u'
+
+packed_version = 'FS21'
+
+TREC_HDR_LEN = 23
+DREC_HDR_LEN = 42
+
+VERBOSE = 0
+
+def hexify(s):
+    """Format an 8-bite string as hex"""
+    l = []
+    for c in s:
+        h = hex(ord(c))
+        if h[:2] == '0x':
+            h = h[2:]
+        if len(h) == 1:
+            l.append("0")
+        l.append(h)
+    return "0x" + string.join(l, '')
+
+def chatter(msg, level=1):
+    if VERBOSE >= level:
+        sys.stdout.write(msg)
+
+def U64(v):
+    """Unpack an 8-byte string as a 64-bit long"""
+    h, l = struct.unpack(">II", v)
+    if h:
+        return (h << 32) + l
+    else:
+        return l
+
+def check(path):
+    file = open(path, 'rb')
+
+    file.seek(0, 2)
+    file_size = file.tell()
+    if file_size == 0:
+        raise FormatError("empty file")
+    file.seek(0)
+    if file.read(4) != packed_version:
+        raise FormatError("invalid file header")
+
+    pos = 4L
+    tid = '\000' * 8 # lowest possible tid to start
+    i = 0
+    while pos:
+        _pos = pos
+        pos, tid = check_trec(path, file, pos, tid, file_size)
+        if tid is not None:
+            chatter("%10d: transaction tid %s #%d \n" %
+                    (_pos, hexify(tid), i))
+            i = i + 1
+
+
+def check_trec(path, file, pos, ltid, file_size):
+    """Read an individual transaction record from file.
+
+    Returns the pos of the next transaction and the transaction id.
+    It also leaves the file pointer set to pos.  The path argument is
+    used for generating error messages.
+    """
+
+    h = file.read(TREC_HDR_LEN)
+    if not h:
+        return None, None
+    if len(h) != TREC_HDR_LEN:
+        raise FormatError("%s truncated at %s" % (path, pos))
+
+    tid, stl, status, ul, dl, el = struct.unpack(">8s8scHHH", h)
+    if el < 0:
+        el = t32 - el
+    tmeta_len = TREC_HDR_LEN + ul + dl + el
+
+    if tid <= ltid:
+        raise FormatError("%s time-stamp reduction at %s: %s <= %s" %
+                          (path, pos, hexify(tid), hexify(ltid)))
+    ltid = tid
+
+    tl = U64(stl) # transaction record length - 8
+    if pos + tl + 8 > file_size:
+        raise FormatError("%s truncated possibly because of"
+                          " damaged records at %s" % (path, pos))
+    if status == Status.checkpoint:
+        raise FormatError("%s checkpoint flag was not cleared at %s"
+                          % (path, pos))
+    if status not in ' up':
+        raise FormatError("%s has invalid status '%s' at %s" %
+                          (path, status, pos))
+
+    if tmeta_len > tl:
+        raise FormatError("%s has an invalid transaction header"
+                          " at %s" % (path, pos))
+
+    tpos = pos
+    tend = tpos + tl
+
+    if status != Status.undone:
+        pos = tpos + tmeta_len
+        file.read(ul + dl + el) # skip transaction metadata
+
+        i = 0
+        while pos < tend:
+            _pos = pos
+            pos, oid = check_drec(path, file, pos, tpos, tid)
+            if pos > tend:
+                raise FormatError("%s has data records that extend beyond"
+                                  " the transaction record; end at %s" %
+                                  (path, pos))
+            chatter("%10d: object oid %s #%d\n" % (_pos, hexify(oid), i),
+                    level=2)
+            i = i + 1
+
+    file.seek(tend)
+    rtl = file.read(8)
+    if rtl != stl:
+        raise FormatError("%s has inconsistent transaction length"
+                          " for undone transaction at %s" % (path, pos))
+    pos = tend + 8
+    return pos, tid
+
+def check_drec(path, file, pos, tpos, tid):
+    """Check a data record for the current transaction record"""
+
+    h = file.read(DREC_HDR_LEN)
+    if len(h) != DREC_HDR_LEN:
+        raise FormatError("%s truncated at %s" % (path, pos))
+    oid, serial, _prev, _tloc, vlen, _plen = (
+        struct.unpack(">8s8s8s8sH8s", h))
+    prev = U64(_prev)
+    tloc = U64(_tloc)
+    plen = U64(_plen)
+    dlen = DREC_HDR_LEN + (plen or 8)
+
+    if vlen:
+        dlen = dlen + 16 + vlen
+        file.seek(8, 1)
+        pv = U64(file.read(8))
+        file.seek(vlen, 1) # skip the version data
+
+    if tloc != tpos:
+        raise FormatError("%s data record exceeds transaction record "
+                          "at %s: tloc %d != tpos %d" %
+                          (path, pos, tloc, tpos))
+
+    pos = pos + dlen
+    # XXX is the following code necessary?
+    if plen:
+        file.seek(plen, 1)
+    else:
+        file.seek(8, 1)
+        # XXX _loadBack() ?
+
+    return pos, oid
+
+def usage():
+    print __doc__
+    sys.exit(-1)
+
+if __name__ == "__main__":
+    import getopt
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'v')
+        if len(args) != 1:
+            raise ValueError, "expected one argument"
+        for k, v in opts:
+            if k == '-v':
+                VERBOSE = VERBOSE + 1
+    except (getopt.error, ValueError):
+        usage()
+
+    try:
+        check(args[0])
+    except FormatError, msg:
+        print msg
+        sys.exit(-1)
+
+    chatter("no errors detected")
--- a/utilities/ZODBTools/migrate.py
+++ b/utilities/ZODBTools/migrate.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2001, 2002, 2003 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+"""A script to gather statistics while doing a storage migration.
+
+This is very similar to a standard storage's copyTransactionsFrom() method,
+except that it's geared to run as a script, and it collects useful pieces of
+information as it's working.  This script can be used to stress test a storage
+since it blasts transactions at it as fast as possible.  You can get a good
+sense of the performance of a storage by running this script.
+
+Actually it just counts the size of pickles in the transaction via the
+iterator protocol, so storage overheads aren't counted.
+
+Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args]
+Options:
+    -S sourcetype
+    --stype=sourcetype
+        This is the name of a recognized type for the source database.  Use -T
+        to print out the known types.  Defaults to "file".
+
+    -D desttype
+    --dtype=desttype
+        This is the name of the recognized type for the destination database.
+        Use -T to print out the known types.  Defaults to "file".
+
+    -o filename
+    --output=filename
+        Print results in filename, otherwise stdout.
+
+    -m txncount
+    --max=txncount
+        Stop after committing txncount transactions.
+
+    -k txncount
+    --skip=txncount
+        Skip the first txncount transactions.
+
+    -p/--profile
+        Turn on specialized profiling.
+
+    -t/--timestamps
+        Print tids as timestamps.
+
+    -T/--storage_types
+        Print all the recognized storage types and exit.
+
+    -v/--verbose
+        Turns on verbose output.  Multiple -v options increase the verbosity.
+
+    -h/--help
+        Print this message and exit.
+
+Positional arguments:
+
+    source-storage-args:
+        Semicolon separated list of arguments for the source storage, as
+        key=val pairs.  E.g. "file_name=Data.fs;read_only=1"
+
+    destination-storage-args:
+        Comma separated list of arguments for the source storage, as key=val
+        pairs.  E.g. "name=full;frequency=3600"
+"""
+
+import re
+import sys
+import time
+import getopt
+import marshal
+import profile
+
+from ZODB import utils
+from ZODB import StorageTypes
+from ZODB.TimeStamp import TimeStamp
+
+PROGRAM = sys.argv[0]
+ZERO = '\0'*8
+
+try:
+    True, False
+except NameError:
+    True = 1
+    False = 0
+
+
+
+def usage(code, msg=''):
+    print >> sys.stderr, __doc__ % globals()
+    if msg:
+        print >> sys.stderr, msg
+    sys.exit(code)
+
+
+def error(code, msg):
+    print >> sys.stderr, msg
+    print "use --help for usage message"
+    sys.exit(code)
+
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            'hvo:pm:k:D:S:Tt',
+            ['help', 'verbose',
+             'output=', 'profile', 'storage_types',
+             'max=', 'skip=', 'dtype=', 'stype=', 'timestamps'])
+    except getopt.error, msg:
+        error(2, msg)
+
+    class Options:
+        stype = 'FileStorage'
+        dtype = 'FileStorage'
+        verbose = 0
+        outfile = None
+        profilep = False
+        maxtxn = -1
+        skiptxn = -1
+        timestamps = False
+
+    options = Options()
+
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-v', '--verbose'):
+            options.verbose += 1
+        elif opt in ('-T', '--storage_types'):
+            print_types()
+            sys.exit(0)
+        elif opt in ('-S', '--stype'):
+            options.stype = arg
+        elif opt in ('-D', '--dtype'):
+            options.dtype = arg
+        elif opt in ('-o', '--output'):
+            options.outfile = arg
+        elif opt in ('-p', '--profile'):
+            options.profilep = True
+        elif opt in ('-m', '--max'):
+            options.maxtxn = int(arg)
+        elif opt in ('-k', '--skip'):
+            options.skiptxn = int(arg)
+        elif opt in ('-t', '--timestamps'):
+            options.timestamps = True
+
+    if len(args) > 2:
+        error(2, "too many arguments")
+
+    srckws = {}
+    if len(args) > 0:
+        srcargs = args[0]
+        for kv in re.split(r';\s*', srcargs):
+            key, val = kv.split('=')
+            srckws[key] = val
+
+    destkws = {}
+    if len(args) > 1:
+        destargs = args[1]
+        for kv in re.split(r';\s*', destargs):
+            key, val = kv.split('=')
+            destkws[key] = val
+
+    if options.stype not in StorageTypes.storage_types.keys():
+        usage(2, 'Source database type must be provided')
+    if options.dtype not in StorageTypes.storage_types.keys():
+        usage(2, 'Destination database type must be provided')
+
+    # Open the output file
+    if options.outfile is None:
+        options.outfp = sys.stdout
+        options.outclosep = False
+    else:
+        options.outfp = open(options.outfile, 'w')
+        options.outclosep = True
+
+    if options.verbose > 0:
+        print 'Opening source database...'
+    modname, sconv = StorageTypes.storage_types[options.stype]
+    kw = sconv(**srckws)
+    __import__(modname)
+    sclass = getattr(sys.modules[modname], options.stype)
+    srcdb = sclass(**kw)
+
+    if options.verbose > 0:
+        print 'Opening destination database...'
+    modname, dconv = StorageTypes.storage_types[options.dtype]
+    kw = dconv(**destkws)
+    __import__(modname)
+    dclass = getattr(sys.modules[modname], options.dtype)
+    dstdb = dclass(**kw)
+
+    try:
+        t0 = time.time()
+        doit(srcdb, dstdb, options)
+        t1 = time.time()
+        if options.verbose > 0:
+            print 'Migration time:          %8.3f' % (t1-t0)
+    finally:
+        # Done
+        srcdb.close()
+        dstdb.close()
+        if options.outclosep:
+            options.outfp.close()
+
+
+
+def doit(srcdb, dstdb, options):
+    outfp = options.outfp
+    profilep = options.profilep
+    verbose = options.verbose
+    # some global information
+    largest_pickle = 0
+    largest_txn_in_size = 0
+    largest_txn_in_objects = 0
+    total_pickle_size = 0L
+    total_object_count = 0
+    # Ripped from BaseStorage.copyTransactionsFrom()
+    ts = None
+    ok = True
+    prevrevids = {}
+    counter = 0
+    skipper = 0
+    if options.timestamps:
+        print "%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % (
+            "NUM", "TID AS TIMESTAMP", "OBJS", "BYTES",
+            # Does anybody know what these times mean?
+            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
+    else:
+        print "%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % (
+            "NUM", "TRANSACTION ID", "OBJS", "BYTES",
+            # Does anybody know what these times mean?
+            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
+    for txn in srcdb.iterator():
+        skipper += 1
+        if skipper <= options.skiptxn:
+            continue
+        counter += 1
+        if counter > options.maxtxn >= 0:
+            break
+        tid = txn.tid
+        if ts is None:
+            ts = TimeStamp(tid)
+        else:
+            t = TimeStamp(tid)
+            if t <= ts:
+                if ok:
+                    print >> sys.stderr, \
+                          'Time stamps are out of order %s, %s' % (ts, t)
+                    ok = False
+                    ts = t.laterThan(ts)
+                    tid = `ts`
+                else:
+                    ts = t
+                    if not ok:
+                        print >> sys.stderr, \
+                              'Time stamps are back in order %s' % t
+                        ok = True
+        if verbose > 1:
+            print ts
+
+        prof = None
+        if profilep and (counter % 100) == 0:
+            prof = profile.Profile()
+        objects = 0
+        size = 0
+        newrevids = RevidAccumulator()
+        t0 = time.time()
+        dstdb.tpc_begin(txn, tid, txn.status)
+        t1 = time.time()
+        for r in txn:
+            oid = r.oid
+            objects += 1
+            thissize = len(r.data)
+            size += thissize
+            if thissize > largest_pickle:
+                largest_pickle = thissize
+            if verbose > 1:
+                if not r.version:
+                    vstr = 'norev'
+                else:
+                    vstr = r.version
+                print utils.U64(oid), vstr, len(r.data)
+            oldrevid = prevrevids.get(oid, ZERO)
+            result = dstdb.store(oid, oldrevid, r.data, r.version, txn)
+            newrevids.store(oid, result)
+        t2 = time.time()
+        result = dstdb.tpc_vote(txn)
+        t3 = time.time()
+        newrevids.tpc_vote(result)
+        prevrevids.update(newrevids.get_dict())
+        # Profile every 100 transactions
+        if prof:
+            prof.runcall(dstdb.tpc_finish, txn)
+        else:
+            dstdb.tpc_finish(txn)
+        t4 = time.time()
+
+        # record the results
+        if objects > largest_txn_in_objects:
+            largest_txn_in_objects = objects
+        if size > largest_txn_in_size:
+            largest_txn_in_size = size
+        if options.timestamps:
+            tidstr = str(TimeStamp(tid))
+            format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f"
+        else:
+            tidstr = utils.U64(tid)
+            format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f"
+        print >> outfp, format % (skipper, tidstr, objects, size,
+                                  t4-t0, t1-t0, t2-t1, t3-t2, t4-t3)
+        total_pickle_size += size
+        total_object_count += objects
+
+        if prof:
+            prof.create_stats()
+            fp = open('profile-%02d.txt' % (counter / 100), 'wb')
+            marshal.dump(prof.stats, fp)
+            fp.close()
+    print >> outfp, "Largest pickle:          %8d" % largest_pickle
+    print >> outfp, "Largest transaction:     %8d" % largest_txn_in_size
+    print >> outfp, "Largest object count:    %8d" % largest_txn_in_objects
+    print >> outfp, "Total pickle size: %14d" % total_pickle_size
+    print >> outfp, "Total object count:      %8d" % total_object_count
+
+
+
+# helper to deal with differences between old-style store() return and
+# new-style store() return that supports ZEO
+import types
+
+class RevidAccumulator:
+
+    def __init__(self):
+        self.data = {}
+
+    def _update_from_list(self, list):
+        for oid, serial in list:
+            if not isinstance(serial, types.StringType):
+                raise serial
+            self.data[oid] = serial
+
+    def store(self, oid, result):
+        if isinstance(result, types.StringType):
+            self.data[oid] = result
+        elif result is not None:
+            self._update_from_list(result)
+
+    def tpc_vote(self, result):
+        if result is not None:
+            self._update_from_list(result)
+
+    def get_dict(self):
+        return self.data
+
+
+
+if __name__ == '__main__':
+    main()
--- a/utilities/ZODBTools/netspace.py
+++ b/utilities/ZODBTools/netspace.py
+#!/usr/bin/env python2.3
+
+"""Report on the net size of objects counting subobjects.
+
+usage: netspace.py [-P | -v] data.fs
+
+-P: do a pack first
+-v: print info for all objects, even if a traversal path isn't found
+"""
+
+import ZODB
+from ZODB.FileStorage import FileStorage
+from ZODB.utils import U64
+from ZODB.fsdump import get_pickle_metadata
+from ZODB.referencesf import referencesf
+
+def find_paths(root, maxdist):
+    """Find Python attribute traversal paths for objects to maxdist distance.
+
+    Starting at a root object, traverse attributes up to distance levels
+    from the root, looking for persistent objects.  Return a dict
+    mapping oids to traversal paths.
+
+    XXX Assumes that the keys of the root are not themselves
+    persistent objects.
+
+    XXX Doesn't traverse containers.
+    """
+    paths = {}
+
+    # Handle the root as a special case because it's a dict
+    objs = []
+    for k, v in root.items():
+        oid = getattr(v, '_p_oid', None)
+        objs.append((k, v, oid, 0))
+
+    for path, obj, oid, dist in objs:
+        if oid is not None:
+            paths[oid] = path
+        if dist < maxdist:
+            getattr(obj, 'foo', None) # unghostify
+            try:
+                items = obj.__dict__.items()
+            except AttributeError:
+                continue
+            for k, v in items:
+                oid = getattr(v, '_p_oid', None)
+                objs.append(("%s.%s" % (path, k), v, oid, dist + 1))
+
+    return paths
+
+def main(path):
+    fs = FileStorage(path, read_only=1)
+    if PACK:
+        fs.pack()
+
+    db = ZODB.DB(fs)
+    rt = db.open().root()
+    paths = find_paths(rt, 3)
+
+    def total_size(oid):
+        cache = {}
+        cache_size = 1000
+        def _total_size(oid, seen):
+            v = cache.get(oid)
+            if v is not None:
+                return v
+            data, serialno = fs.load(oid, '')
+            size = len(data)
+            for suboid in referencesf(data):
+                if seen.has_key(suboid):
+                    continue
+                seen[suboid] = 1
+                size += _total_size(suboid, seen)
+            cache[oid] = size
+            if len(cache) == cache_size:
+                cache.popitem()
+            return size
+        return _total_size(oid, {})
+
+    keys = fs._index.keys()
+    keys.sort()
+    keys.reverse()
+
+    if not VERBOSE:
+        # If not running verbosely, don't print an entry for an object
+        # unless it has an entry in paths.
+        keys = filter(paths.has_key, keys)
+
+    fmt = "%8s %5d %8d %s %s.%s"
+
+    for oid in keys:
+        data, serialno = fs.load(oid, '')
+        mod, klass = get_pickle_metadata(data)
+        refs = referencesf(data)
+        path = paths.get(oid, '-')
+        print fmt % (U64(oid), len(data), total_size(oid), path, mod, klass)
+
+if __name__ == "__main__":
+    import sys
+    import getopt
+
+    PACK = 0
+    VERBOSE = 0
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'Pv')
+        path, = args
+    except getopt.error, err:
+        print err
+        print __doc__
+        sys.exit(2)
+    except ValueError:
+        print "expected one argument, got", len(args)
+        print __doc__
+        sys.exit(2)
+    for o, v in opts:
+        if o == '-P':
+            PACK = 1
+        if o == '-v':
+            VERBOSE += 1
+    main(path)
--- a/utilities/ZODBTools/parsezeolog.py
+++ b/utilities/ZODBTools/parsezeolog.py
+#!/usr/bin/env python2.3
+
+"""Parse the BLATHER logging generated by ZEO2.
+
+An example of the log format is:
+2002-04-15T13:05:29 BLATHER(-100) ZEO Server storea(3235680, [714], 235339406490168806) ('10.0.26.30', 45514)
+"""
+
+import re
+import time
+
+rx_time = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
+
+def parse_time(line):
+    """Return the time portion of a zLOG line in seconds or None."""
+    mo = rx_time.match(line)
+    if mo is None:
+        return None
+    date, time_ = mo.group(1, 2)
+    date_l = [int(elt) for elt in date.split('-')]
+    time_l = [int(elt) for elt in time_.split(':')]
+    return int(time.mktime(date_l + time_l + [0, 0, 0]))
+
+rx_meth = re.compile("zrpc:\d+ calling (\w+)\((.*)")
+
+def parse_method(line):
+    pass
+
+def parse_line(line):
+    """Parse a log entry and return time, method info, and client."""
+    t = parse_time(line)
+    if t is None:
+        return None, None
+    mo = rx_meth.search(line)
+    if mo is None:
+        return None, None
+    meth_name = mo.group(1)
+    meth_args = mo.group(2).strip()
+    if meth_args.endswith(')'):
+        meth_args = meth_args[:-1]
+    meth_args = [s.strip() for s in meth_args.split(",")]
+    m = meth_name, tuple(meth_args)
+    return t, m
+
+class TStats:
+
+    counter = 1
+
+    def __init__(self):
+        self.id = TStats.counter
+        TStats.counter += 1
+
+    fields = ("time", "vote", "done", "user", "path")
+    fmt = "%-24s %5s %5s %-15s %s"
+    hdr = fmt % fields
+
+    def report(self):
+        """Print a report about the transaction"""
+        t = time.ctime(self.begin)
+        if hasattr(self, "vote"):
+            d_vote = self.vote - self.begin
+        else:
+            d_vote = "*"
+        if hasattr(self, "finish"):
+            d_finish = self.finish - self.begin
+        else:
+            d_finish =  "*"
+        print self.fmt % (time.ctime(self.begin), d_vote, d_finish,
+                          self.user, self.url)
+
+class TransactionParser:
+
+    def __init__(self):
+        self.txns = {}
+        self.skipped = 0
+
+    def parse(self, line):
+        t, m = parse_line(line)
+        if t is None:
+            return
+        name = m[0]
+        meth = getattr(self, name, None)
+        if meth is not None:
+            meth(t, m[1])
+
+    def tpc_begin(self, time, args):
+        t = TStats()
+        t.begin = time
+        t.user = args[1]
+        t.url = args[2]
+        t.objects = []
+        tid = eval(args[0])
+        self.txns[tid] = t
+
+    def get_txn(self, args):
+        tid = eval(args[0])
+        try:
+            return self.txns[tid]
+        except KeyError:
+            print "uknown tid", repr(tid)
+            return None
+
+    def tpc_finish(self, time, args):
+        t = self.get_txn(args)
+        if t is None:
+            return
+        t.finish = time
+
+    def vote(self, time, args):
+        t = self.get_txn(args)
+        if t is None:
+            return
+        t.vote = time
+
+    def get_txns(self):
+        L = [(t.id, t) for t in self.txns.values()]
+        L.sort()
+        return [t for (id, t) in L]
+
+if __name__ == "__main__":
+    import fileinput
+
+    p = TransactionParser()
+    i = 0
+    for line in fileinput.input():
+        i += 1
+        try:
+            p.parse(line)
+        except:
+            print "line", i
+            raise
+    print "Transaction: %d" % len(p.txns)
+    print TStats.hdr
+    for txn in p.get_txns():
+        txn.report()
--- a/utilities/ZODBTools/repozo.py
+++ b/utilities/ZODBTools/repozo.py
+#!/usr/bin/env python2.3
+
+# repozo.py -- incremental and full backups of a Data.fs file.
+#
+# Originally written by Anthony Baxter
+# Significantly modified by Barry Warsaw
+
+"""repozo.py -- incremental and full backups of a Data.fs file.
+
+Usage: %(program)s [options]
+Where:
+
+    Exactly one of -B or -R must be specified:
+
+    -B / --backup
+        Backup current ZODB file.
+
+    -R / --recover
+        Restore a ZODB file from a backup.
+
+    -v / --verbose
+        Verbose mode.
+
+    -h / --help
+        Print this text and exit.
+
+    -r dir
+    --repository=dir
+        Repository directory containing the backup files.  This argument
+        is required.
+
+Options for -B/--backup:
+    -f file
+    --file=file
+        Source Data.fs file.  This argument is required.
+
+    -F / --full
+        Force a full backup.  By default, an incremental backup is made
+        if possible (e.g., if a pack has occurred since the last
+        incremental backup, a full backup is necessary).
+
+    -Q / --quick
+        Verify via md5 checksum only the last incremental written.  This
+        significantly reduces the disk i/o at the (theoretical) cost of
+        inconsistency.  This is a probabilistic way of determining whether
+        a full backup is necessary.
+
+    -z / --gzip
+        Compress with gzip the backup files.  Uses the default zlib
+        compression level.  By default, gzip compression is not used.
+
+Options for -R/--recover:
+    -D str
+    --date=str
+        Recover state as of this date.  str is in the format
+            yyyy-mm-dd[-hh[-mm]]
+        By default, current time is used.
+
+    -o filename
+    --output=filename
+        Write recovered ZODB to given file.  By default, the file is
+        written to stdout.
+"""
+
+import os
+import sys
+import md5
+import gzip
+import time
+import errno
+import getopt
+
+from ZODB.FileStorage import FileStorage
+
+program = sys.argv[0]
+
+try:
+    True, False
+except NameError:
+    True = 1
+    False = 0
+
+BACKUP = 1
+RECOVER = 2
+
+COMMASPACE = ', '
+READCHUNK = 16 * 1024
+VERBOSE = False
+
+
+
+def usage(code, msg=''):
+    outfp = sys.stderr
+    if code == 0:
+        outfp = sys.stdout
+
+    print >> outfp, __doc__ % globals()
+    if msg:
+        print >> outfp, msg
+
+    sys.exit(code)
+
+
+def log(msg, *args):
+    if VERBOSE:
+        # Use stderr here so that -v flag works with -R and no -o
+        print >> sys.stderr, msg % args
+
+
+
+def parseargs():
+    global VERBOSE
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Qz',
+                                   ['backup', 'recover', 'verbose', 'help',
+                                    'file=', 'repository=', 'full', 'date=',
+                                    'output=', 'quick', 'gzip'])
+    except getopt.error, msg:
+        usage(1, msg)
+
+    class Options:
+        mode = None         # BACKUP or RECOVER
+        file = None         # name of input Data.fs file
+        repository = None   # name of directory holding backups
+        full = False        # True forces full backup
+        date = None         # -D argument, if any
+        output = None       # where to write recovered data; None = stdout
+        quick = False       # -Q flag state
+        gzip = False        # -z flag state
+
+    options = Options()
+
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-v', '--verbose'):
+            VERBOSE = True
+        elif opt in ('-R', '--recover'):
+            if options.mode is not None:
+                usage(1, '-B and -R are mutually exclusive')
+            options.mode = RECOVER
+        elif opt in ('-B', '--backup'):
+            if options.mode is not None:
+                usage(1, '-B and -R are mutually exclusive')
+            options.mode = BACKUP
+        elif opt in ('-Q', '--quick'):
+            options.quick = True
+        elif opt in ('-f', '--file'):
+            options.file = arg
+        elif opt in ('-r', '--repository'):
+            options.repository = arg
+        elif opt in ('-F', '--full'):
+            options.full = True
+        elif opt in ('-D', '--date'):
+            options.date = arg
+        elif opt in ('-o', '--output'):
+            options.output = arg
+        elif opt in ('-z', '--gzip'):
+            options.gzip = True
+        else:
+            assert False, (opt, arg)
+
+    # Any other arguments are invalid
+    if args:
+        usage(1, 'Invalid arguments: ' + COMMASPACE.join(args))
+
+    # Sanity checks
+    if options.mode is None:
+        usage(1, 'Either --backup or --recover is required')
+    if options.repository is None:
+        usage(1, '--repository is required')
+    if options.mode == BACKUP:
+        if options.date is not None:
+            log('--date option is ignored in backup mode')
+            options.date = None
+        if options.output is not None:
+            log('--output option is ignored in backup mode')
+            options.output = None
+    else:
+        assert options.mode == RECOVER
+        if options.file is not None:
+            log('--file option is ignored in recover mode')
+            options.file = None
+    return options
+
+
+
+# Read bytes (no more than n, or to EOF if n is None) in chunks from the
+# current position in file fp.  Pass each chunk as an argument to func().
+# Return the total number of bytes read == the total number of bytes
+# passed in all to func().  Leaves the file position just after the
+# last byte read.
+def dofile(func, fp, n=None):
+    bytesread = 0L
+    while n is None or n > 0:
+        if n is None:
+            todo = READCHUNK
+        else:
+            todo = min(READCHUNK, n)
+        data = fp.read(todo)
+        if not data:
+            break
+        func(data)
+        nread = len(data)
+        bytesread += nread
+        if n is not None:
+            n -= nread
+    return bytesread
+
+
+def checksum(fp, n):
+    # Checksum the first n bytes of the specified file
+    sum = md5.new()
+    def func(data):
+        sum.update(data)
+    dofile(func, fp, n)
+    return sum.hexdigest()
+
+
+def copyfile(options, dst, start, n):
+    # Copy bytes from file src, to file dst, starting at offset start, for n
+    # length of bytes
+    sum = md5.new()
+    ifp = open(options.file, 'rb')
+    ifp.seek(start)
+    if options.gzip:
+        ofp = gzip.open(dst, 'wb')
+    else:
+        ofp = open(dst, 'wb')
+    def func(data):
+        sum.update(data)
+        ofp.write(data)
+    ndone = dofile(func, ifp, n)
+    ofp.close()
+    ifp.close()
+    assert ndone == n
+    return sum.hexdigest()
+
+
+def concat(files, ofp=None):
+    # Concatenate a bunch of files from the repository, output to `outfile' if
+    # given.  Return the number of bytes written and the md5 checksum of the
+    # bytes.
+    sum = md5.new()
+    def func(data):
+        sum.update(data)
+        if ofp:
+            ofp.write(data)
+    bytesread = 0
+    for f in files:
+        # Auto uncompress
+        if f.endswith('fsz'):
+            ifp = gzip.open(f, 'rb')
+        else:
+            ifp = open(f, 'rb')
+        bytesread += dofile(func, ifp)
+        ifp.close()
+    if ofp:
+        ofp.close()
+    return bytesread, sum.hexdigest()
+
+
+def gen_filename(options, ext=None):
+    if ext is None:
+        if options.full:
+            ext = '.fs'
+        else:
+            ext = '.deltafs'
+        if options.gzip:
+            ext += 'z'
+    t = time.gmtime()[:6] + (ext,)
+    return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
+
+
+def find_files(options):
+    def rootcmp(x, y):
+        # This already compares in reverse order
+        return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
+    # Return a list of files needed to reproduce state at time `when'
+    when = options.date
+    if not when:
+        when = gen_filename(options, '')
+    log('looking for files b/w last full backup and %s...', when)
+    all = os.listdir(options.repository)
+    all.sort(rootcmp)
+    # Find the last full backup before date, then include all the incrementals
+    # between when and that full backup.
+    needed = []
+    for file in all:
+        root, ext = os.path.splitext(file)
+        if root <= when:
+            needed.append(file)
+        if ext in ('.fs', '.fsz'):
+            break
+    # Make the file names relative to the repository directory
+    needed = [os.path.join(options.repository, f) for f in needed]
+    # Restore back to chronological order
+    needed.reverse()
+    if needed:
+        log('files needed to recover state as of %s:', when)
+        for f in needed:
+            log('\t%s', f)
+    else:
+        log('no files found')
+    return needed
+
+# Scan the .dat file corresponding to the last full backup performed.
+# Return
+#
+#     filename, startpos, endpos, checksum
+#
+# of the last incremental.  If there is no .dat file, or the .dat file
+# is empty, return
+#
+#     None, None, None, None
+
+def scandat(repofiles):
+    fullfile = repofiles[0]
+    datfile = os.path.splitext(fullfile)[0] + '.dat'
+    fn = startpos = endpos = sum = None # assume .dat file missing or empty
+    try:
+        fp = open(datfile)
+    except IOError, e:
+        if e.errno <> errno.ENOENT:
+            raise
+    else:
+        # We only care about the last one.
+        lines = fp.readlines()
+        fp.close()
+        if lines:
+            fn, startpos, endpos, sum = lines[-1].split()
+            startpos = long(startpos)
+            endpos = long(endpos)
+
+    return fn, startpos, endpos, sum
+
+
+
+def do_full_backup(options):
+    # Find the file position of the last completed transaction.
+    fs = FileStorage(options.file, read_only=True)
+    # Note that the FileStorage ctor calls read_index() which scans the file
+    # and returns "the position just after the last valid transaction record".
+    # getSize() then returns this position, which is exactly what we want,
+    # because we only want to copy stuff from the beginning of the file to the
+    # last valid transaction record.
+    pos = fs.getSize()
+    fs.close()
+    options.full = True
+    dest = os.path.join(options.repository, gen_filename(options))
+    if os.path.exists(dest):
+        print >> sys.stderr, 'Cannot overwrite existing file:', dest
+        sys.exit(2)
+    log('writing full backup: %s bytes to %s', pos, dest)
+    sum = copyfile(options, dest, 0, pos)
+    # Write the data file for this full backup
+    datfile = os.path.splitext(dest)[0] + '.dat'
+    fp = open(datfile, 'w')
+    print >> fp, dest, 0, pos, sum
+    fp.close()
+
+
+def do_incremental_backup(options, reposz, repofiles):
+    # Find the file position of the last completed transaction.
+    fs = FileStorage(options.file, read_only=True)
+    # Note that the FileStorage ctor calls read_index() which scans the file
+    # and returns "the position just after the last valid transaction record".
+    # getSize() then returns this position, which is exactly what we want,
+    # because we only want to copy stuff from the beginning of the file to the
+    # last valid transaction record.
+    pos = fs.getSize()
+    fs.close()
+    options.full = False
+    dest = os.path.join(options.repository, gen_filename(options))
+    if os.path.exists(dest):
+        print >> sys.stderr, 'Cannot overwrite existing file:', dest
+        sys.exit(2)
+    log('writing incremental: %s bytes to %s',  pos-reposz, dest)
+    sum = copyfile(options, dest, reposz, pos - reposz)
+    # The first file in repofiles points to the last full backup.  Use this to
+    # get the .dat file and append the information for this incrementatl to
+    # that file.
+    fullfile = repofiles[0]
+    datfile = os.path.splitext(fullfile)[0] + '.dat'
+    # This .dat file better exist.  Let the exception percolate if not.
+    fp = open(datfile, 'a')
+    print >> fp, dest, reposz, pos, sum
+    fp.close()
+
+
+def do_backup(options):
+    repofiles = find_files(options)
+    # See if we need to do a full backup
+    if options.full or not repofiles:
+        log('doing a full backup')
+        do_full_backup(options)
+        return
+    srcsz = os.path.getsize(options.file)
+    if options.quick:
+        fn, startpos, endpos, sum = scandat(repofiles)
+        # If the .dat file was missing, or was empty, do a full backup
+        if (fn, startpos, endpos, sum) == (None, None, None, None):
+            log('missing or empty .dat file (full backup)')
+            do_full_backup(options)
+            return
+        # Has the file shrunk, possibly because of a pack?
+        if srcsz < endpos:
+            log('file shrunk, possibly because of a pack (full backup)')
+            do_full_backup(options)
+            return
+        # Now check the md5 sum of the source file, from the last
+        # incremental's start and stop positions.
+        srcfp = open(options.file, 'rb')
+        srcfp.seek(startpos)
+        srcsum = checksum(srcfp, endpos-startpos)
+        srcfp.close()
+        log('last incremental file: %s', fn)
+        log('last incremental checksum: %s', sum)
+        log('source checksum range: [%s..%s], sum: %s',
+            startpos, endpos, srcsum)
+        if sum == srcsum:
+            if srcsz == endpos:
+                log('No changes, nothing to do')
+                return
+            log('doing incremental, starting at: %s', endpos)
+            do_incremental_backup(options, endpos, repofiles)
+            return
+    else:
+        # This was is much slower, and more disk i/o intensive, but it's also
+        # more accurate since it checks the actual existing files instead of
+        # the information in the .dat file.
+        #
+        # See if we can do an incremental, based on the files that already
+        # exist.  This call of concat() will not write an output file.
+        reposz, reposum = concat(repofiles)
+        log('repository state: %s bytes, md5: %s', reposz, reposum)
+        # Get the md5 checksum of the source file, up to two file positions:
+        # the entire size of the file, and up to the file position of the last
+        # incremental backup.
+        srcfp = open(options.file, 'rb')
+        srcsum = checksum(srcfp, srcsz)
+        srcfp.seek(0)
+        srcsum_backedup = checksum(srcfp, reposz)
+        srcfp.close()
+        log('current state   : %s bytes, md5: %s', srcsz, srcsum)
+        log('backed up state : %s bytes, md5: %s', reposz, srcsum_backedup)
+        # Has nothing changed?
+        if srcsz == reposz and srcsum == reposum:
+            log('No changes, nothing to do')
+            return
+        # Has the file shrunk, probably because of a pack?
+        if srcsz < reposz:
+            log('file shrunk, possibly because of a pack (full backup)')
+            do_full_backup(options)
+            return
+        # The source file is larger than the repository.  If the md5 checksums
+        # match, then we know we can do an incremental backup.  If they don't,
+        # then perhaps the file was packed at some point (or a
+        # non-transactional undo was performed, but this is deprecated).  Only
+        # do a full backup if forced to.
+        #
+        # XXX For ZODB4, this needs to take into account the storage metadata
+        # header that FileStorage has grown at the front of the file.
+        if reposum == srcsum_backedup:
+            log('doing incremental, starting at: %s', reposz)
+            do_incremental_backup(options, reposz, repofiles)
+            return
+    # The checksums don't match, meaning the front of the source file has
+    # changed.  We'll need to do a full backup in that case.
+    log('file changed, possibly because of a pack (full backup)')
+    do_full_backup(options)
+
+
+
+def do_recover(options):
+    # Find the first full backup at or before the specified date
+    repofiles = find_files(options)
+    if not repofiles:
+        if options.date:
+            log('No files in repository before %s', options.date)
+        else:
+            log('No files in repository')
+        return
+    if options.output is None:
+        log('Recovering file to stdout')
+        outfp = sys.stdout
+    else:
+        log('Recovering file to %s', options.output)
+        outfp = open(options.output, 'wb')
+    reposz, reposum = concat(repofiles, outfp)
+    if outfp <> sys.stdout:
+        outfp.close()
+    log('Recovered %s bytes, md5: %s', reposz, reposum)
+
+
+
+def main():
+    options = parseargs()
+    if options.mode == BACKUP:
+        do_backup(options)
+    else:
+        assert options.mode == RECOVER
+        do_recover(options)
+
+
+if __name__ == '__main__':
+    main()
--- a/utilities/ZODBTools/space.py
+++ b/utilities/ZODBTools/space.py
+#!/usr/bin/env python2.3
+
+"""Report on the space used by objects in a storage.
+
+usage: space.py data.fs
+
+The current implementation only supports FileStorage.
+
+Current limitations / simplifications: Ignores revisions and versions.
+"""
+
+from ZODB.FileStorage import FileStorage
+from ZODB.utils import U64
+from ZODB.fsdump import get_pickle_metadata
+
+def run(path, v=0):
+    fs = FileStorage(path, read_only=1)
+    # break into the file implementation
+    if hasattr(fs._index, 'iterkeys'):
+        iter = fs._index.iterkeys()
+    else:
+        iter = fs._index.keys()
+    totals = {}
+    for oid in iter:
+        data, serialno = fs.load(oid, '')
+        mod, klass = get_pickle_metadata(data)
+        key = "%s.%s" % (mod, klass)
+        bytes, count = totals.get(key, (0, 0))
+        bytes += len(data)
+        count += 1
+        totals[key] = bytes, count
+        if v:
+            print "%8s %5d %s" % (U64(oid), len(data), key)
+    L = totals.items()
+    L.sort(lambda a, b: cmp(a[1], b[1]))
+    L.reverse()
+    print "Totals per object class:"
+    for key, (bytes, count) in L:
+        print "%8d %8d %s" % (count, bytes, key)
+
+def main():
+    import sys
+    import getopt
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "v")
+    except getopt.error, msg:
+        print msg
+        print "usage: space.py [-v] Data.fs"
+        sys.exit(2)
+    if len(args) != 1:
+        print "usage: space.py [-v] Data.fs"
+        sys.exit(2)
+    v = 0
+    for o, a in opts:
+        if o == "-v":
+            v += 1
+    path = args[0]
+    run(path, v)
+
+if __name__ == "__main__":
+    main()
--- a/utilities/ZODBTools/tests/test-checker.fs
+++ b/utilities/ZODBTools/tests/test-checker.fs
--- a/utilities/ZODBTools/tests/testfstest.py
+++ b/utilities/ZODBTools/tests/testfstest.py
+"""Verify that fstest.py can find errors.
+
+XXX To run this test script fstest.py must be on your PYTHONPATH.
+"""
+
+from cStringIO import StringIO
+import os
+import re
+import struct
+import tempfile
+import unittest
+
+import fstest
+from fstest import FormatError, U64
+
+class TestCorruptedFS(unittest.TestCase):
+
+    # XXX path?
+    f = open('test-checker.fs', 'rb')
+    datafs = f.read()
+    f.close()
+    del f
+
+    def setUp(self):
+        self._temp = tempfile.mktemp()
+        self._file = open(self._temp, 'wb')
+
+    def tearDown(self):
+        if not self._file.closed:
+            self._file.close()
+        if os.path.exists(self._temp):
+            try:
+                os.remove(self._temp)
+            except os.error:
+                pass
+
+    def noError(self):
+        if not self._file.closed:
+            self._file.close()
+        fstest.check(self._temp)
+
+    def detectsError(self, rx):
+        if not self._file.closed:
+            self._file.close()
+        try:
+            fstest.check(self._temp)
+        except FormatError, msg:
+            mo = re.search(rx, str(msg))
+            self.failIf(mo is None, "unexpected error: %s" % msg)
+        else:
+            self.fail("fstest did not detect corruption")
+
+    def getHeader(self):
+        buf = self._datafs.read(16)
+        if not buf:
+            return 0, ''
+        tl = U64(buf[8:])
+        return tl, buf
+
+    def copyTransactions(self, n):
+        """Copy at most n transactions from the good data"""
+        f = self._datafs = StringIO(self.datafs)
+        self._file.write(f.read(4))
+        for i in range(n):
+            tl, data = self.getHeader()
+            if not tl:
+                return
+            self._file.write(data)
+            rec = f.read(tl - 8)
+            self._file.write(rec)
+
+    def testGood(self):
+        self._file.write(self.datafs)
+        self.noError()
+
+    def testTwoTransactions(self):
+        self.copyTransactions(2)
+        self.noError()
+
+    def testEmptyFile(self):
+        self.detectsError("empty file")
+
+    def testInvalidHeader(self):
+        self._file.write('SF12')
+        self.detectsError("invalid file header")
+
+    def testTruncatedTransaction(self):
+        self._file.write(self.datafs[:4+22])
+        self.detectsError("truncated")
+
+    def testCheckpointFlag(self):
+        self.copyTransactions(2)
+        tl, data = self.getHeader()
+        assert tl > 0, "ran out of good transaction data"
+        self._file.write(data)
+        self._file.write('c')
+        self._file.write(self._datafs.read(tl - 9))
+        self.detectsError("checkpoint flag")
+
+    def testInvalidStatus(self):
+        self.copyTransactions(2)
+        tl, data = self.getHeader()
+        assert tl > 0, "ran out of good transaction data"
+        self._file.write(data)
+        self._file.write('Z')
+        self._file.write(self._datafs.read(tl - 9))
+        self.detectsError("invalid status")
+
+    def testTruncatedRecord(self):
+        self.copyTransactions(3)
+        tl, data = self.getHeader()
+        assert tl > 0, "ran out of good transaction data"
+        self._file.write(data)
+        buf = self._datafs.read(tl / 2)
+        self._file.write(buf)
+        self.detectsError("truncated possibly")
+
+    def testBadLength(self):
+        self.copyTransactions(2)
+        tl, data = self.getHeader()
+        assert tl > 0, "ran out of good transaction data"
+        self._file.write(data)
+        buf = self._datafs.read(tl - 8)
+        self._file.write(buf[0])
+        assert tl <= 1<<16, "can't use this transaction for this test"
+        self._file.write("\777\777")
+        self._file.write(buf[3:])
+        self.detectsError("invalid transaction header")
+
+    def testDecreasingTimestamps(self):
+        self.copyTransactions(0)
+        tl, data = self.getHeader()
+        buf = self._datafs.read(tl - 8)
+        t1 = data + buf
+
+        tl, data = self.getHeader()
+        buf = self._datafs.read(tl - 8)
+        t2 = data + buf
+
+        self._file.write(t2[:8] + t1[8:])
+        self._file.write(t1[:8] + t2[8:])
+        self.detectsError("time-stamp")
+
+    def testTruncatedData(self):
+        # This test must re-write the transaction header length in
+        # order to trigger the error in check_drec().  If it doesn't,
+        # the truncated data record would also caught a truncated
+        # transaction record.
+        self.copyTransactions(1)
+        tl, data = self.getHeader()
+        pos = self._file.tell()
+        self._file.write(data)
+        buf = self._datafs.read(tl - 8)
+        hdr = buf[:15]
+        ul, dl, el = struct.unpack(">HHH", hdr[-6:])
+        self._file.write(buf[:15 + ul + dl + el])
+        data = buf[15 + ul + dl + el:]
+        self._file.write(data[:24])
+        self._file.seek(pos + 8, 0)
+        newlen = struct.pack(">II", 0, tl - (len(data) - 24))
+        self._file.write(newlen)
+        self.detectsError("truncated at")
+
+    def testBadDataLength(self):
+        self.copyTransactions(1)
+        tl, data = self.getHeader()
+        self._file.write(data)
+        buf = self._datafs.read(tl - 8)
+        hdr = buf[:7]
+        # write the transaction meta data
+        ul, dl, el = struct.unpack(">HHH", hdr[-6:])
+        self._file.write(buf[:7 + ul + dl + el])
+
+        # write the first part of the data header
+        data = buf[7 + ul + dl + el:]
+        self._file.write(data[:24])
+        self._file.write("\000" * 4 + "\077" + "\000" * 3)
+        self._file.write(data[32:])
+        self.detectsError("record exceeds transaction")
+
+if __name__ == "__main__":
+    unittest.main()
--- a/utilities/ZODBTools/tests/testzeopack.py
+++ b/utilities/ZODBTools/tests/testzeopack.py
+# Some simple tests for zeopack.py
+# For this to work, zeopack.py must by on your PATH.
+
+from ZODB.FileStorage import FileStorage
+from ZODB.tests.StorageTestBase import StorageTestBase
+from ZEO.tests import forker
+import ZODB
+
+import os
+import socket
+import tempfile
+import threading
+import time
+import unittest
+
+# XXX The forker interface isn't clearly defined.  It's different on
+# different branches of ZEO.  This will break someday.
+
+# XXX Only handle the Unix variant of the forker.  Just to give Tim
+# something to do.
+
+class PackerTests(StorageTestBase):
+
+    def setUp(self):
+        self.started = 0
+
+    def start(self):
+        self.started =1
+        self.path = tempfile.mktemp(suffix=".fs")
+        self._storage = FileStorage(self.path)
+        self.db = ZODB.DB(self._storage)
+        self.do_updates()
+        self.pid, self.exit = forker.start_zeo_server(self._storage, self.addr)
+
+    def do_updates(self):
+        for i in range(100):
+            self._dostore()
+
+    def tearDown(self):
+        if not self.started:
+            return
+        self.db.close()
+        self._storage.close()
+        self.exit.close()
+        try:
+            os.kill(self.pid, 9)
+        except os.error:
+            pass
+        try:
+            os.waitpid(self.pid, 0)
+        except os.error, err:
+            ##print "waitpid failed", err
+            pass
+        for ext in '', '.old', '.lock', '.index', '.tmp':
+            path = self.path + ext
+            try:
+                os.remove(path)
+            except os.error:
+                pass
+
+    def set_inet_addr(self):
+        self.host = socket.gethostname()
+        self.port = forker.get_port()
+        self.addr = self.host, self.port
+
+    def testPack(self):
+        self.set_inet_addr()
+        self.start()
+        status = os.system("zeopack.py -h %s -p %s" % (self.host, self.port))
+        assert status == 0
+        assert os.path.exists(self.path + ".old")
+
+    def testPackDays(self):
+        self.set_inet_addr()
+        self.start()
+        status = os.system("zeopack.py -h %s -p %s -d 1" % (self.host,
+                                                            self.port))
+        # Since we specified one day, nothing should get packed
+        assert status == 0
+        assert not os.path.exists(self.path + ".old")
+
+    def testAF_UNIXPack(self):
+        self.addr = tempfile.mktemp(suffix=".zeo-socket")
+        self.start()
+        status = os.system("zeopack.py -U %s" % self.addr)
+        assert status == 0
+        assert os.path.exists(self.path + ".old")
+
+    def testNoServer(self):
+        status = os.system("zeopack.py -p 19")
+        assert status != 0
+
+    def testWaitForServer(self):
+        self.set_inet_addr()
+        def delayed_start():
+            time.sleep(11)
+            self.start()
+        t = threading.Thread(target=delayed_start)
+        t.start()
+        status = os.system("zeopack.py -h %s -p %s -W" % (self.host,
+                                                          self.port))
+        t.join()
+        assert status == 0
+        assert os.path.exists(self.path + ".old")
+
+class UpTest(unittest.TestCase):
+
+    def testUp(self):
+        status = os.system("zeoup.py -p 19")
+        # There is no ZEO server on port 19, so we should see non-zero
+        # exit status.
+        assert status != 0
+
+if __name__ == "__main__":
+    unittest.main()
--- a/utilities/ZODBTools/timeout.py
+++ b/utilities/ZODBTools/timeout.py
+#!/usr/bin/env python2.3
+
+"""Transaction timeout test script.
+
+This script connects to a storage, begins a transaction, calls store()
+and tpc_vote(), and then sleeps forever.  This should trigger the
+transaction timeout feature of the server.
+
+usage: timeout.py address delay [storage-name]
+
+"""
+
+import sys
+import time
+
+from ZODB.Transaction import Transaction
+from ZODB.tests.MinPO import MinPO
+from ZODB.tests.StorageTestBase import zodb_pickle
+from ZEO.ClientStorage import ClientStorage
+
+ZERO = '\0'*8
+
+def main():
+    if len(sys.argv) not in (3, 4):
+        sys.stderr.write("Usage: timeout.py address delay [storage-name]\n" %
+                         sys.argv[0])
+        sys.exit(2)
+
+    hostport = sys.argv[1]
+    delay = float(sys.argv[2])
+    if sys.argv[3:]:
+        name = sys.argv[3]
+    else:
+        name = "1"
+
+    if "/" in hostport:
+        address = hostport
+    else:
+        if ":" in hostport:
+            i = hostport.index(":")
+            host, port = hostport[:i], hostport[i+1:]
+        else:
+            host, port = "", hostport
+        port = int(port)
+        address = (host, port)
+
+    print "Connecting to %s..." % repr(address)
+    storage = ClientStorage(address, name)
+    print "Connected.  Now starting a transaction..."
+
+    oid = storage.new_oid()
+    version = ""
+    revid = ZERO
+    data = MinPO("timeout.py")
+    pickled_data = zodb_pickle(data)
+    t = Transaction()
+    t.user = "timeout.py"
+    storage.tpc_begin(t)
+    storage.store(oid, revid, pickled_data, version, t)
+    print "Stored.  Now voting..."
+    storage.tpc_vote(t)
+
+    print "Voted; now sleeping %s..." % delay
+    time.sleep(delay)
+    print "Done."
+
+if __name__ == "__main__":
+    main()
--- a/utilities/ZODBTools/zeopack.py
+++ b/utilities/ZODBTools/zeopack.py
+#!/usr/bin/env python2.3
+
+"""Connect to a ZEO server and ask it to pack.
+
+Usage: zeopack.py [options]
+
+Options:
+
+    -p port -- port to connect to
+
+    -h host -- host to connect to (default is current host)
+
+    -U path -- Unix-domain socket to connect to
+
+    -S name -- storage name (default is '1')
+
+    -d days -- pack objects more than days old
+
+    -1 -- Connect to a ZEO 1 server
+
+    -W -- wait for server to come up.  Normally the script tries to
+       connect for 10 seconds, then exits with an error.  The -W
+       option is only supported with ZEO 1.
+
+You must specify either -p and -h or -U.
+"""
+
+import getopt
+import socket
+import sys
+import time
+
+from ZEO.ClientStorage import ClientStorage
+
+WAIT = 10 # wait no more than 10 seconds for client to connect
+
+def connect(storage):
+    # The connect-on-startup logic that ZEO provides isn't too useful
+    # for this script.  We'd like to client to attempt to startup, but
+    # fail if it can't get through to the server after a reasonable
+    # amount of time.  There's no external support for this, so we'll
+    # expose the ZEO 1.0 internals.  (consenting adults only)
+    t0 = time.time()
+    while t0 + WAIT > time.time():
+        storage._call.connect()
+        if storage._connected:
+            return
+    raise RuntimeError, "Unable to connect to ZEO server"
+
+def pack1(addr, storage, days, wait):
+    cs = ClientStorage(addr, storage=storage,
+                       wait_for_server_on_startup=wait)
+    if wait:
+        # _startup() is an artifact of the way ZEO 1.0 works.  The
+        # ClientStorage doesn't get fully initialized until registerDB()
+        # is called.  The only thing we care about, though, is that
+        # registerDB() calls _startup().
+        cs._startup()
+    else:
+        connect(cs)
+    cs.invalidator = None
+    cs.pack(wait=1, days=days)
+    cs.close()
+
+def pack2(addr, storage, days):
+    cs = ClientStorage(addr, storage=storage, wait=1, read_only=1)
+    cs.pack(wait=1, days=days)
+    cs.close()
+
+def usage(exit=1):
+    print __doc__
+    print " ".join(sys.argv)
+    sys.exit(exit)
+
+def main():
+    host = None
+    port = None
+    unix = None
+    storage = '1'
+    days = 0
+    wait = 0
+    zeoversion = 2
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'p:h:U:S:d:W1')
+        for o, a in opts:
+            if o == '-p':
+                port = int(a)
+            elif o == '-h':
+                host = a
+            elif o == '-U':
+                unix = a
+            elif o == '-S':
+                storage = a
+            elif o == '-d':
+                days = int(a)
+            elif o == '-W':
+                wait = 1
+            elif o == '-1':
+                zeoversion = 1
+    except Exception, err:
+        print err
+        usage()
+
+    if unix is not None:
+        addr = unix
+    else:
+        if host is None:
+            host = socket.gethostname()
+        if port is None:
+            usage()
+        addr = host, port
+
+    if zeoversion == 1:
+        pack1(addr, storage, days, wait)
+    else:
+        pack2(addr, storage, days)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception, err:
+        print err
+        sys.exit(1)
--- a/utilities/ZODBTools/zeoqueue.py
+++ b/utilities/ZODBTools/zeoqueue.py
+#!/usr/bin/env python2.3
+
+"""Report on the number of currently waiting clients in the ZEO queue.
+
+Usage: %(PROGRAM)s [options] logfile
+
+Options:
+    -h / --help
+        Print this help text and exit.
+
+    -v / --verbose
+        Verbose output
+
+    -f file
+    --file file
+        Use the specified file to store the incremental state as a pickle.  If
+        not given, %(STATEFILE)s is used.
+
+    -r / --reset
+        Reset the state of the tool.  This blows away any existing state
+        pickle file and then exits -- it does not parse the file.  Use this
+        when you rotate log files so that the next run will parse from the
+        beginning of the file.
+"""
+
+import os
+import re
+import sys
+import time
+import errno
+import getopt
+import cPickle as pickle
+
+COMMASPACE = ', '
+STATEFILE = 'zeoqueue.pck'
+PROGRAM = sys.argv[0]
+
+try:
+    True, False
+except NameError:
+    True = 1
+    False = 0
+
+
+
+tcre = re.compile(r"""
+    (?P<ymd>
+     \d{4}-      # year
+     \d{2}-      # month
+     \d{2})      # day
+    T            # separator
+    (?P<hms>
+     \d{2}:      # hour
+     \d{2}:      # minute
+     \d{2})      # second
+     """, re.VERBOSE)
+
+ccre = re.compile(r"""
+    zrpc-conn:(?P<addr>\d+.\d+.\d+.\d+:\d+)\s+
+    calling\s+
+    (?P<method>
+     \w+)        # the method
+    \(           # args open paren
+      \'         # string quote start
+        (?P<tid>
+         \S+)    # first argument -- usually the tid
+      \'         # end of string
+    (?P<rest>
+     .*)         # rest of line
+    """, re.VERBOSE)
+
+wcre = re.compile(r'Clients waiting: (?P<num>\d+)')
+
+
+
+def parse_time(line):
+    """Return the time portion of a zLOG line in seconds or None."""
+    mo = tcre.match(line)
+    if mo is None:
+        return None
+    date, time_ = mo.group('ymd', 'hms')
+    date_l = [int(elt) for elt in date.split('-')]
+    time_l = [int(elt) for elt in time_.split(':')]
+    return int(time.mktime(date_l + time_l + [0, 0, 0]))
+
+
+class Txn:
+    """Track status of single transaction."""
+    def __init__(self, tid):
+        self.tid = tid
+        self.hint = None
+        self.begin = None
+        self.vote = None
+        self.abort = None
+        self.finish = None
+        self.voters = []
+
+    def isactive(self):
+        if self.begin and not (self.abort or self.finish):
+            return True
+        else:
+            return False
+
+
+
+class Status:
+    """Track status of ZEO server by replaying log records.
+
+    We want to keep track of several events:
+
+    - The last committed transaction.
+    - The last committed or aborted transaction.
+    - The last transaction that got the lock but didn't finish.
+    - The client address doing the first vote of a transaction.
+    - The number of currently active transactions.
+    - The number of reported queued transactions.
+    - Client restarts.
+    - Number of current connections. XXX (This might not be useful.)
+
+    We can observe these events by reading the following sorts of log
+    entries:
+
+    2002-12-16T06:16:05 BLATHER(-100) zrpc:12649 calling
+    tpc_begin('\x03I\x90((\xdbp\xd5', '', 'QueueCatal...
+
+    2002-12-16T06:16:06 BLATHER(-100) zrpc:12649 calling
+    vote('\x03I\x90((\xdbp\xd5')
+
+    2002-12-16T06:16:06 BLATHER(-100) zrpc:12649 calling
+    tpc_finish('\x03I\x90((\xdbp\xd5')
+
+    2002-12-16T10:46:10 INFO(0) ZSS:12649:1 Transaction blocked waiting
+    for storage. Clients waiting: 1.
+
+    2002-12-16T06:15:57 BLATHER(-100) zrpc:12649 connect from
+    ('10.0.26.54', 48983): <ManagedServerConnection ('10.0.26.54', 48983)>
+
+    2002-12-16T10:30:09 INFO(0) ZSS:12649:1 disconnected
+    """
+
+    def __init__(self):
+        self.lineno = 0
+        self.pos = 0
+        self.reset()
+
+    def reset(self):
+        self.commit = None
+        self.commit_or_abort = None
+        self.last_unfinished = None
+        self.n_active = 0
+        self.n_blocked = 0
+        self.n_conns = 0
+        self.t_restart = None
+        self.txns = {}
+
+    def iscomplete(self):
+        # The status report will always be complete if we encounter an
+        # explicit restart.
+        if self.t_restart is not None:
+            return True
+        # If we haven't seen a restart, assume that seeing a finished
+        # transaction is good enough.
+        return self.commit is not None
+
+    def process_file(self, fp):
+        if self.pos:
+            if VERBOSE:
+                print 'seeking to file position', self.pos
+            fp.seek(self.pos)
+        while True:
+            line = fp.readline()
+            if not line:
+                break
+            self.lineno += 1
+            self.process(line)
+        self.pos = fp.tell()
+
+    def process(self, line):
+        if line.find("calling") != -1:
+            self.process_call(line)
+        elif line.find("connect") != -1:
+            self.process_connect(line)
+        # test for "locked" because word may start with "B" or "b"
+        elif line.find("locked") != -1:
+            self.process_block(line)
+        elif line.find("Starting") != -1:
+            self.process_start(line)
+
+    def process_call(self, line):
+        mo = ccre.search(line)
+        if mo is None:
+            return
+        called_method = mo.group('method')
+        # XXX exit earlier if we've got zeoLoad, because it's the most
+        # frequently called method and we don't use it.
+        if called_method == "zeoLoad":
+            return
+        t = parse_time(line)
+        meth = getattr(self, "call_%s" % called_method, None)
+        if meth is None:
+            return
+        client = mo.group('addr')
+        tid = mo.group('tid')
+        rest = mo.group('rest')
+        meth(t, client, tid, rest)
+
+    def process_connect(self, line):
+        pass
+
+    def process_block(self, line):
+        mo = wcre.search(line)
+        if mo is None:
+            # assume that this was a restart message for the last blocked
+            # transaction.
+            self.n_blocked = 0
+        else:
+            self.n_blocked = int(mo.group('num'))
+
+    def process_start(self, line):
+        if line.find("Starting ZEO server") != -1:
+            self.reset()
+            self.t_restart = parse_time(line)
+
+    def call_tpc_begin(self, t, client, tid, rest):
+        txn = Txn(tid)
+        txn.begin = t
+        if rest[0] == ',':
+            i = 1
+            while rest[i].isspace():
+                i += 1
+            rest = rest[i:]
+        txn.hint = rest
+        self.txns[tid] = txn
+        self.n_active += 1
+        self.last_unfinished = txn
+
+    def call_vote(self, t, client, tid, rest):
+        txn = self.txns.get(tid)
+        if txn is None:
+            print "Oops!"
+            txn = self.txns[tid] = Txn(tid)
+        txn.vote = t
+        txn.voters.append(client)
+
+    def call_tpc_abort(self, t, client, tid, rest):
+        txn = self.txns.get(tid)
+        if txn is None:
+            print "Oops!"
+            txn = self.txns[tid] = Txn(tid)
+        txn.abort = t
+        txn.voters = []
+        self.n_active -= 1
+        if self.commit_or_abort:
+            # delete the old transaction
+            try:
+                del self.txns[self.commit_or_abort.tid]
+            except KeyError:
+                pass
+        self.commit_or_abort = txn
+
+    def call_tpc_finish(self, t, client, tid, rest):
+        txn = self.txns.get(tid)
+        if txn is None:
+            print "Oops!"
+            txn = self.txns[tid] = Txn(tid)
+        txn.finish = t
+        txn.voters = []
+        self.n_active -= 1
+        if self.commit:
+            # delete the old transaction
+            try:
+                del self.txns[self.commit.tid]
+            except KeyError:
+                pass
+        if self.commit_or_abort:
+            # delete the old transaction
+            try:
+                del self.txns[self.commit_or_abort.tid]
+            except KeyError:
+                pass
+        self.commit = self.commit_or_abort = txn
+
+    def report(self):
+        print "Blocked transactions:", self.n_blocked
+        if not VERBOSE:
+            return
+        if self.t_restart:
+            print "Server started:", time.ctime(self.t_restart)
+
+        if self.commit is not None:
+            t = self.commit_or_abort.finish
+            if t is None:
+                t = self.commit_or_abort.abort
+            print "Last finished transaction:", time.ctime(t)
+
+        # the blocked transaction should be the first one that calls vote
+        L = [(txn.begin, txn) for txn in self.txns.values()]
+        L.sort()
+
+        for x, txn in L:
+            if txn.isactive():
+                began = txn.begin
+                if txn.voters:
+                    print "Blocked client (first vote):", txn.voters[0]
+                print "Blocked transaction began at:", time.ctime(began)
+                print "Hint:", txn.hint
+                print "Idle time: %d sec" % int(time.time() - began)
+                break
+
+
+
+def usage(code, msg=''):
+    print >> sys.stderr, __doc__ % globals()
+    if msg:
+        print >> sys.stderr, msg
+    sys.exit(code)
+
+
+def main():
+    global VERBOSE
+
+    VERBOSE = 0
+    file = STATEFILE
+    reset = False
+    # -0 is a secret option used for testing purposes only
+    seek = True
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'vhf:r0',
+                                   ['help', 'verbose', 'file=', 'reset'])
+    except getopt.error, msg:
+        usage(1, msg)
+
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-v', '--verbose'):
+            VERBOSE += 1
+        elif opt in ('-f', '--file'):
+            file = arg
+        elif opt in ('-r', '--reset'):
+            reset = True
+        elif opt == '-0':
+            seek = False
+
+    if reset:
+        # Blow away the existing state file and exit
+        try:
+            os.unlink(file)
+            if VERBOSE:
+                print 'removing pickle state file', file
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+        return
+
+    if not args:
+        usage(1, 'logfile is required')
+    if len(args) > 1:
+        usage(1, 'too many arguments: %s' % COMMASPACE.join(args))
+
+    path = args[0]
+
+    # Get the previous status object from the pickle file, if it is available
+    # and if the --reset flag wasn't given.
+    status = None
+    try:
+        statefp = open(file, 'rb')
+        try:
+            status = pickle.load(statefp)
+            if VERBOSE:
+                print 'reading status from file', file
+        finally:
+            statefp.close()
+    except IOError, e:
+        if e.errno <> errno.ENOENT:
+            raise
+    if status is None:
+        status = Status()
+        if VERBOSE:
+            print 'using new status'
+
+    if not seek:
+        status.pos = 0
+
+    fp = open(path, 'rb')
+    try:
+        status.process_file(fp)
+    finally:
+        fp.close()
+    # Save state
+    statefp = open(file, 'wb')
+    pickle.dump(status, statefp, 1)
+    statefp.close()
+    # Print the report and return the number of blocked clients in the exit
+    # status code.
+    status.report()
+    sys.exit(status.n_blocked)
+
+
+if __name__ == "__main__":
+    main()
--- a/utilities/ZODBTools/zeoreplay.py
+++ b/utilities/ZODBTools/zeoreplay.py
+#!/usr/bin/env python2.3
+
+"""Parse the BLATHER logging generated by ZEO, and optionally replay it.
+
+Usage: zeointervals.py [options]
+
+Options:
+
+    --help / -h
+        Print this message and exit.
+
+    --replay=storage
+    -r storage
+        Replay the parsed transactions through the new storage
+
+    --maxtxn=count
+    -m count
+        Parse no more than count transactions.
+
+    --report / -p
+        Print a report as we're parsing.
+
+Unlike parsezeolog.py, this script generates timestamps for each transaction,
+and sub-command in the transaction.  We can use this to compare timings with
+synthesized data.
+"""
+
+import re
+import sys
+import time
+import getopt
+import operator
+# ZEO logs measure wall-clock time so for consistency we need to do the same
+#from time import clock as now
+from time import time as now
+
+from ZODB.FileStorage import FileStorage
+#from BDBStorage.BDBFullStorage import BDBFullStorage
+#from Standby.primary import PrimaryStorage
+#from Standby.config import RS_PORT
+from ZODB.Transaction import Transaction
+from ZODB.utils import p64
+
+datecre = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
+methcre = re.compile("ZEO Server (\w+)\((.*)\) \('(.*)', (\d+)")
+
+class StopParsing(Exception):
+    pass
+
+
+
+def usage(code, msg=''):
+    print __doc__
+    if msg:
+        print msg
+    sys.exit(code)
+
+
+
+def parse_time(line):
+    """Return the time portion of a zLOG line in seconds or None."""
+    mo = datecre.match(line)
+    if mo is None:
+        return None
+    date, time_ = mo.group(1, 2)
+    date_l = [int(elt) for elt in date.split('-')]
+    time_l = [int(elt) for elt in time_.split(':')]
+    return int(time.mktime(date_l + time_l + [0, 0, 0]))
+
+
+def parse_line(line):
+    """Parse a log entry and return time, method info, and client."""
+    t = parse_time(line)
+    if t is None:
+        return None, None, None
+    mo = methcre.search(line)
+    if mo is None:
+        return None, None, None
+    meth_name = mo.group(1)
+    meth_args = mo.group(2)
+    meth_args = [s.strip() for s in meth_args.split(',')]
+    m = meth_name, tuple(meth_args)
+    c = mo.group(3), mo.group(4)
+    return t, m, c
+
+
+
+class StoreStat:
+    def __init__(self, when, oid, size):
+        self.when = when
+        self.oid = oid
+        self.size = size
+
+    # Crufty
+    def __getitem__(self, i):
+        if i == 0: return self.oid
+        if i == 1: return self.size
+        raise IndexError
+
+
+class TxnStat:
+    def __init__(self):
+        self._begintime = None
+        self._finishtime = None
+        self._aborttime = None
+        self._url = None
+        self._objects = []
+
+    def tpc_begin(self, when, args, client):
+        self._begintime = when
+        # args are txnid, user, description (looks like it's always a url)
+        self._url = args[2]
+
+    def storea(self, when, args, client):
+        oid = int(args[0])
+        # args[1] is "[numbytes]"
+        size = int(args[1][1:-1])
+        s = StoreStat(when, oid, size)
+        self._objects.append(s)
+
+    def tpc_abort(self, when):
+        self._aborttime = when
+
+    def tpc_finish(self, when):
+        self._finishtime = when
+
+
+
+# Mapping oid -> revid
+_revids = {}
+
+class ReplayTxn(TxnStat):
+    def __init__(self, storage):
+        self._storage = storage
+        self._replaydelta = 0
+        TxnStat.__init__(self)
+
+    def replay(self):
+        ZERO = '\0'*8
+        t0 = now()
+        t = Transaction()
+        self._storage.tpc_begin(t)
+        for obj in self._objects:
+            oid = obj.oid
+            revid = _revids.get(oid, ZERO)
+            # BAW: simulate a pickle of the given size
+            data = 'x' * obj.size
+            # BAW: ignore versions for now
+            newrevid  = self._storage.store(p64(oid), revid, data, '', t)
+            _revids[oid] = newrevid
+        if self._aborttime:
+            self._storage.tpc_abort(t)
+            origdelta = self._aborttime - self._begintime
+        else:
+            self._storage.tpc_vote(t)
+            self._storage.tpc_finish(t)
+            origdelta = self._finishtime - self._begintime
+        t1 = now()
+        # Shows how many seconds behind (positive) or ahead (negative) of the
+        # original reply our local update took
+        self._replaydelta = t1 - t0 - origdelta
+
+
+
+class ZEOParser:
+    def __init__(self, maxtxns=-1, report=1, storage=None):
+        self.__txns = []
+        self.__curtxn = {}
+        self.__skipped = 0
+        self.__maxtxns = maxtxns
+        self.__finishedtxns = 0
+        self.__report = report
+        self.__storage = storage
+
+    def parse(self, line):
+        t, m, c = parse_line(line)
+        if t is None:
+            # Skip this line
+            return
+        name = m[0]
+        meth = getattr(self, name, None)
+        if meth is not None:
+            meth(t, m[1], c)
+
+    def tpc_begin(self, when, args, client):
+        txn = ReplayTxn(self.__storage)
+        self.__curtxn[client] = txn
+        meth = getattr(txn, 'tpc_begin', None)
+        if meth is not None:
+            meth(when, args, client)
+
+    def storea(self, when, args, client):
+        txn = self.__curtxn.get(client)
+        if txn is None:
+            self.__skipped += 1
+            return
+        meth = getattr(txn, 'storea', None)
+        if meth is not None:
+            meth(when, args, client)
+
+    def tpc_finish(self, when, args, client):
+        txn = self.__curtxn.get(client)
+        if txn is None:
+            self.__skipped += 1
+            return
+        meth = getattr(txn, 'tpc_finish', None)
+        if meth is not None:
+            meth(when)
+        if self.__report:
+            self.report(txn)
+        self.__txns.append(txn)
+        self.__curtxn[client] = None
+        self.__finishedtxns += 1
+        if self.__maxtxns > 0 and self.__finishedtxns >= self.__maxtxns:
+            raise StopParsing
+
+    def report(self, txn):
+        """Print a report about the transaction"""
+        if txn._objects:
+            bytes = reduce(operator.add, [size for oid, size in txn._objects])
+        else:
+            bytes = 0
+        print '%s %s %4d %10d %s %s' % (
+            txn._begintime, txn._finishtime - txn._begintime,
+            len(txn._objects),
+            bytes,
+            time.ctime(txn._begintime),
+            txn._url)
+
+    def replay(self):
+        for txn in self.__txns:
+            txn.replay()
+        # How many fell behind?
+        slower = []
+        faster = []
+        for txn in self.__txns:
+            if txn._replaydelta > 0:
+                slower.append(txn)
+            else:
+                faster.append(txn)
+        print len(slower), 'laggards,', len(faster), 'on-time or faster'
+        # Find some averages
+        if slower:
+            sum = reduce(operator.add,
+                         [txn._replaydelta for txn in slower], 0)
+            print 'average slower txn was:', float(sum) / len(slower)
+        if faster:
+            sum = reduce(operator.add,
+                         [txn._replaydelta for txn in faster], 0)
+            print 'average faster txn was:', float(sum) / len(faster)
+
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            'hr:pm:',
+            ['help', 'replay=', 'report', 'maxtxns='])
+    except getopt.error, e:
+        usage(1, e)
+
+    if args:
+        usage(1)
+
+    replay = 0
+    maxtxns = -1
+    report = 0
+    storagefile = None
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-r', '--replay'):
+            replay = 1
+            storagefile = arg
+        elif opt in ('-p', '--report'):
+            report = 1
+        elif opt in ('-m', '--maxtxns'):
+            try:
+                maxtxns = int(arg)
+            except ValueError:
+                usage(1, 'Bad -m argument: %s' % arg)
+
+    if replay:
+        storage = FileStorage(storagefile)
+        #storage = BDBFullStorage(storagefile)
+        #storage = PrimaryStorage('yyz', storage, RS_PORT)
+    t0 = now()
+    p = ZEOParser(maxtxns, report, storage)
+    i = 0
+    while 1:
+        line = sys.stdin.readline()
+        if not line:
+            break
+        i += 1
+        try:
+            p.parse(line)
+        except StopParsing:
+            break
+        except:
+            print 'input file line:', i
+            raise
+    t1 = now()
+    print 'total parse time:', t1-t0
+    t2 = now()
+    if replay:
+        p.replay()
+    t3 = now()
+    print 'total replay time:', t3-t2
+    print 'total time:', t3-t0
+
+
+
+if __name__ == '__main__':
+    main()
--- a/utilities/ZODBTools/zeoserverlog.py
+++ b/utilities/ZODBTools/zeoserverlog.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2003 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Tools for analyzing ZEO Server logs.
+
+This script contains a number of commands, implemented by command
+functions. To run a command, give the command name and it's arguments
+as arguments to this script.
+
+Commands:
+
+  blocked_times file threshold
+
+     Output a summary of episodes where thransactions were blocked
+     when the episode lasted at least threshold seconds.
+
+     The file may be a file name or - to read from standard input.
+     The file may also be a command:
+
+       script blocked_times 'bunzip2 <foo.log.bz2' 60
+
+     If the file is a command, it must contain at least a single
+     space.
+
+     The columns of output are:
+
+     - The time the episode started
+
+     - The seconds from the start of the episode until the blocking
+       transaction finished.
+
+     - The client id (host and port) of the blocking transaction.
+
+     - The seconds from the start of the episode until the end of the
+       episode.
+
+  time_calls file threshold
+
+     Time how long calls took. Note that this is normally combined
+     with grep to time just a particulat kind of call:
+
+       script time_calls 'bunzip2 <foo.log.bz2 | grep tpc_finish' 10
+
+       time_trans threshold
+
+     The columns of output are:
+
+     - The time of the call invocation
+
+     - The seconds from the call to the return
+
+     - The client that made the call.
+
+  time_trans file threshold
+
+    Output a summary of transactions that held the global transaction
+    lock for at least threshold seconds. (This is the time from when
+    voting starts until the transaction is completed by the server.)
+
+    The columns of output are:
+
+    - time that the vote started.
+
+    - client id
+
+    - number of objects written / number of objects updated
+
+    - seconds from tpc_begin to vote start
+
+    - seconds spent voting
+
+    - vote status: n=normal, d=delayed, e=error
+
+    - seconds wating between vote return and finish call
+
+    - time spent finishing or 'abort' if the transaction aborted
+
+  minute file
+
+    Compute production statistics by minute
+
+    The columns of output are:
+
+    - date/time
+
+    - Number of active clients
+
+    - number of reads
+
+    - number of stores
+
+    - number of commits (finish)
+
+    - number of aborts
+
+    - number of transactions (commits + aborts)
+
+    Summary statistics are printed at the end
+
+  minutes file
+
+    Show just the summary statistics for production by minute.
+
+  hour file
+
+    Compute production statistics by hour
+
+  hours file
+
+    Show just the summary statistics for production by hour.
+
+  day file
+
+    Compute production statistics by day
+
+  days file
+
+    Show just the summary statistics for production by day.
+
+  verify file
+
+    Compute verification statistics
+
+    The columns of output are:
+
+    - client id
+    - verification start time
+    - number of object's verified
+    - wall time to verify
+    - average miliseconds to verify per object.
+
+$Id: zeoserverlog.py,v 1.5 2004/03/18 13:27:49 yuppie Exp $
+"""
+
+import datetime, sys, re, os
+
+
+def time(line):
+    d = line[:10]
+    t = line[11:19]
+    y, mo, d = map(int, d.split('-'))
+    h, mi, s = map(int, t.split(':'))
+    return datetime.datetime(y, mo, d, h, mi, s)
+
+
+def sub(t1, t2):
+    delta = t2 - t1
+    return delta.days*86400.0+delta.seconds+delta.microseconds/1000000.0
+
+
+
+waitre = re.compile(r'Clients waiting: (\d+)')
+idre = re.compile(r' ZSS:\d+/(\d+.\d+.\d+.\d+:\d+) ')
+def blocked_times(args):
+    f, thresh = args
+
+    t1 = t2 = cid = blocking = waiting = 0
+    last_blocking = False
+
+    thresh = int(thresh)
+
+    for line in xopen(f):
+        line = line.strip()
+
+        if line.endswith('Blocked transaction restarted.'):
+            blocking = False
+            waiting = 0
+        else:
+            s = waitre.search(line)
+            if not s:
+                continue
+            waiting = int(s.group(1))
+            blocking = line.find(
+                'Transaction blocked waiting for storage') >= 0
+
+        if blocking and waiting == 1:
+            t1 = time(line)
+            t2 = t1
+
+        if not blocking and last_blocking:
+            last_wait = 0
+            t2 = time(line)
+            cid = idre.search(line).group(1)
+
+        if waiting == 0:
+            d = sub(t1, time(line))
+            if d >= thresh:
+                print t1, sub(t1, t2), cid, d
+            t1 = t2 = cid = blocking = waiting = last_wait = max_wait = 0
+
+        last_blocking = blocking
+
+connidre = re.compile(r' zrpc-conn:(\d+.\d+.\d+.\d+:\d+) ')
+def time_calls(f):
+    f, thresh = f
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    thresh = float(thresh)
+    t1 = None
+    maxd = 0
+
+    for line in f:
+        line = line.strip()
+
+        if ' calling ' in line:
+            t1 = time(line)
+        elif ' returns ' in line and t1 is not None:
+            d = sub(t1, time(line))
+            if d >= thresh:
+                print t1, d, connidre.search(line).group(1)
+            maxd = max(maxd, d)
+            t1 = None
+
+    print maxd
+
+def xopen(f):
+    if f == '-':
+        return sys.stdin
+    if ' ' in f:
+        return os.popen(f, 'r')
+    return open(f)
+
+def time_tpc(f):
+    f, thresh = f
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    thresh = float(thresh)
+    transactions = {}
+
+    for line in f:
+        line = line.strip()
+
+        if ' calling vote(' in line:
+            cid = connidre.search(line).group(1)
+            transactions[cid] = time(line),
+        elif ' vote returns None' in line:
+            cid = connidre.search(line).group(1)
+            transactions[cid] += time(line), 'n'
+        elif ' vote() raised' in line:
+            cid = connidre.search(line).group(1)
+            transactions[cid] += time(line), 'e'
+        elif ' vote returns ' in line:
+            # delayed, skip
+            cid = connidre.search(line).group(1)
+            transactions[cid] += time(line), 'd'
+        elif ' calling tpc_abort(' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                t1, t2, vs = transactions[cid]
+                t = time(line)
+                d = sub(t1, t)
+                if d >= thresh:
+                    print 'a', t1, cid, sub(t1, t2), vs, sub(t2, t)
+                del transactions[cid]
+        elif ' calling tpc_finish(' in line:
+            if cid in transactions:
+                cid = connidre.search(line).group(1)
+                transactions[cid] += time(line),
+        elif ' tpc_finish returns ' in line:
+            if cid in transactions:
+                t1, t2, vs, t3 = transactions[cid]
+                t = time(line)
+                d = sub(t1, t)
+                if d >= thresh:
+                    print 'c', t1, cid, sub(t1, t2), vs, sub(t2, t3), sub(t3, t)
+                del transactions[cid]
+
+
+newobre = re.compile(r"storea\(.*, '\\x00\\x00\\x00\\x00\\x00")
+def time_trans(f):
+    f, thresh = f
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    thresh = float(thresh)
+    transactions = {}
+
+    for line in f:
+        line = line.strip()
+
+        if ' calling tpc_begin(' in line:
+            cid = connidre.search(line).group(1)
+            transactions[cid] = time(line), [0, 0]
+        if ' calling storea(' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                transactions[cid][1][0] += 1
+                if not newobre.search(line):
+                    transactions[cid][1][1] += 1
+
+        elif ' calling vote(' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                transactions[cid] += time(line),
+        elif ' vote returns None' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                transactions[cid] += time(line), 'n'
+        elif ' vote() raised' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                transactions[cid] += time(line), 'e'
+        elif ' vote returns ' in line:
+            # delayed, skip
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                transactions[cid] += time(line), 'd'
+        elif ' calling tpc_abort(' in line:
+            cid = connidre.search(line).group(1)
+            if cid in transactions:
+                try:
+                    t0, (stores, old), t1, t2, vs = transactions[cid]
+                except ValueError:
+                    pass
+                else:
+                    t = time(line)
+                    d = sub(t1, t)
+                    if d >= thresh:
+                        print t1, cid, "%s/%s" % (stores, old), \
+                              sub(t0, t1), sub(t1, t2), vs, \
+                              sub(t2, t), 'abort'
+                del transactions[cid]
+        elif ' calling tpc_finish(' in line:
+            if cid in transactions:
+                cid = connidre.search(line).group(1)
+                transactions[cid] += time(line),
+        elif ' tpc_finish returns ' in line:
+            if cid in transactions:
+                t0, (stores, old), t1, t2, vs, t3 = transactions[cid]
+                t = time(line)
+                d = sub(t1, t)
+                if d >= thresh:
+                    print t1, cid, "%s/%s" % (stores, old), \
+                          sub(t0, t1), sub(t1, t2), vs, \
+                          sub(t2, t3), sub(t3, t)
+                del transactions[cid]
+
+def minute(f, slice=16, detail=1, summary=1):
+    f, = f
+
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    cols = ["time", "reads", "stores", "commits", "aborts", "txns"]
+    fmt = "%18s %6s %6s %7s %6s %6s"
+    print fmt % cols
+    print fmt % ["-"*len(col) for col in cols]
+
+    mlast = r = s = c = a = cl = None
+    rs = []
+    ss = []
+    cs = []
+    as = []
+    ts = []
+    cls = []
+
+    for line in f:
+        line = line.strip()
+        if (line.find('returns') > 0
+            or line.find('storea') > 0
+            or line.find('tpc_abort') > 0
+            ):
+            client = connidre.search(line).group(1)
+            m = line[:slice]
+            if m != mlast:
+                if mlast:
+                    if detail:
+                        print fmt % (mlast, len(cl), r, s, c, a, a+c)
+                    cls.append(len(cl))
+                    rs.append(r)
+                    ss.append(s)
+                    cs.append(c)
+                    as.append(a)
+                    ts.append(c+a)
+                mlast = m
+                r = s = c = a = 0
+                cl = {}
+            if line.find('zeoLoad') > 0:
+                r += 1
+                cl[client] = 1
+            elif line.find('storea') > 0:
+                s += 1
+                cl[client] = 1
+            elif line.find('tpc_finish') > 0:
+                c += 1
+                cl[client] = 1
+            elif line.find('tpc_abort') > 0:
+                a += 1
+                cl[client] = 1
+
+    if mlast:
+        if detail:
+            print fmt % (mlast, len(cl), r, s, c, a, a+c)
+        cls.append(len(cl))
+        rs.append(r)
+        ss.append(s)
+        cs.append(c)
+        as.append(a)
+        ts.append(c+a)
+
+    if summary:
+        print
+        print 'Summary:     \t', '\t'.join(('min', '10%', '25%', 'med',
+                                            '75%', '90%', 'max', 'mean'))
+        print "n=%6d\t" % len(cls), '-'*62
+        print 'Clients: \t', '\t'.join(map(str,stats(cls)))
+        print 'Reads:   \t', '\t'.join(map(str,stats( rs)))
+        print 'Stores:  \t', '\t'.join(map(str,stats( ss)))
+        print 'Commits: \t', '\t'.join(map(str,stats( cs)))
+        print 'Aborts:  \t', '\t'.join(map(str,stats( as)))
+        print 'Trans:   \t', '\t'.join(map(str,stats( ts)))
+
+def stats(s):
+    s.sort()
+    min = s[0]
+    max = s[-1]
+    n = len(s)
+    out = [min]
+    ni = n + 1
+    for p in .1, .25, .5, .75, .90:
+        lp = ni*p
+        l = int(lp)
+        if lp < 1 or lp > n:
+            out.append('-')
+        elif abs(lp-l) < .00001:
+            out.append(s[l-1])
+        else:
+            out.append(int(s[l-1] + (lp - l) * (s[l] - s[l-1])))
+
+    mean = 0.0
+    for v in s:
+        mean += v
+
+    out.extend([max, int(mean/n)])
+
+    return out
+
+def minutes(f):
+    minute(f, 16, detail=0)
+
+def hour(f):
+    minute(f, 13)
+
+def day(f):
+    minute(f, 10)
+
+def hours(f):
+    minute(f, 13, detail=0)
+
+def days(f):
+    minute(f, 10, detail=0)
+
+
+new_connection_idre = re.compile(r"new connection \('(\d+.\d+.\d+.\d+)', (\d+)\):")
+def verify(f):
+    f, = f
+
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    t1 = None
+    nv = {}
+    for line in f:
+        if line.find('new connection') > 0:
+            m = new_connection_idre.search(line)
+            cid = "%s:%s" % (m.group(1), m.group(2))
+            nv[cid] = [time(line), 0]
+        elif line.find('calling zeoVerify(') > 0:
+            cid = connidre.search(line).group(1)
+            nv[cid][1] += 1
+        elif line.find('calling endZeoVerify()') > 0:
+            cid = connidre.search(line).group(1)
+            t1, n = nv[cid]
+            if n:
+                d = sub(t1, time(line))
+                print cid, t1, n, d, n and (d*1000.0/n) or '-'
+
+def recovery(f):
+    f, = f
+
+    if f == '-':
+        f = sys.stdin
+    else:
+        f = xopen(f)
+
+    last = ''
+    trans = []
+    n = 0
+    for line in f:
+        n += 1
+        if line.find('RecoveryServer') < 0:
+            continue
+        l = line.find('sending transaction ')
+        if l > 0 and last.find('sending transaction ') > 0:
+            trans.append(line[l+20:].strip())
+        else:
+            if trans:
+                if len(trans) > 1:
+                    print "  ... %s similar records skipped ..." % (
+                        len(trans) - 1)
+                    print n, last.strip()
+                trans=[]
+            print n, line.strip()
+        last = line
+
+    if len(trans) > 1:
+        print "  ... %s similar records skipped ..." % (
+            len(trans) - 1)
+        print n, last.strip()
+
+
+
+if __name__ == '__main__':
+    globals()[sys.argv[1]](sys.argv[2:])
--- a/utilities/ZODBTools/zeoup.py
+++ b/utilities/ZODBTools/zeoup.py
+#!/usr/bin/env python2.3
+
+"""Make sure a ZEO server is running.
+
+usage: zeoup.py [options]
+
+The test will connect to a ZEO server, load the root object, and attempt to
+update the zeoup counter in the root.  It will report success if it updates
+the counter or if it gets a ConflictError.  A ConflictError is considered a
+success, because the client was able to start a transaction.
+
+Options:
+
+    -p port -- port to connect to
+
+    -h host -- host to connect to (default is current host)
+
+    -S storage -- storage name (default '1')
+
+    -U path -- Unix-domain socket to connect to
+
+    --nowrite -- Do not update the zeoup counter.
+
+    -1 -- Connect to a ZEO 1.0 server.
+
+You must specify either -p and -h or -U.
+"""
+
+import getopt
+import socket
+import sys
+import time
+
+from persistent.mapping import PersistentMapping
+import transaction
+
+import ZODB
+from ZODB.POSException import ConflictError
+from ZODB.tests.MinPO import MinPO
+from ZEO.ClientStorage import ClientStorage
+
+ZEO_VERSION = 2
+
+def check_server(addr, storage, write):
+    t0 = time.time()
+    if ZEO_VERSION == 2:
+        # XXX should do retries w/ exponential backoff
+        cs = ClientStorage(addr, storage=storage, wait=0,
+                           read_only=(not write))
+    else:
+        cs = ClientStorage(addr, storage=storage, debug=1,
+                           wait_for_server_on_startup=1)
+    # _startup() is an artifact of the way ZEO 1.0 works.  The
+    # ClientStorage doesn't get fully initialized until registerDB()
+    # is called.  The only thing we care about, though, is that
+    # registerDB() calls _startup().
+
+    if write:
+        db = ZODB.DB(cs)
+        cn = db.open()
+        root = cn.root()
+        try:
+            # We store the data in a special `monitor' dict under the root,
+            # where other tools may also store such heartbeat and bookkeeping
+            # type data.
+            monitor = root.get('monitor')
+            if monitor is None:
+                monitor = root['monitor'] = PersistentMapping()
+            obj = monitor['zeoup'] = monitor.get('zeoup', MinPO(0))
+            obj.value += 1
+            transaction.commit()
+        except ConflictError:
+            pass
+        cn.close()
+        db.close()
+    else:
+        data, serial = cs.load("\0\0\0\0\0\0\0\0", "")
+        cs.close()
+    t1 = time.time()
+    print "Elapsed time: %.2f" % (t1 - t0)
+
+def usage(exit=1):
+    print __doc__
+    print " ".join(sys.argv)
+    sys.exit(exit)
+
+def main():
+    host = None
+    port = None
+    unix = None
+    write = 1
+    storage = '1'
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'p:h:U:S:1',
+                                   ['nowrite'])
+        for o, a in opts:
+            if o == '-p':
+                port = int(a)
+            elif o == '-h':
+                host = a
+            elif o == '-U':
+                unix = a
+            elif o == '-S':
+                storage = a
+            elif o == '--nowrite':
+                write = 0
+            elif o == '-1':
+                ZEO_VERSION = 1
+    except Exception, err:
+        s = str(err)
+        if s:
+            s = ": " + s
+        print err.__class__.__name__ + s
+        usage()
+
+    if unix is not None:
+        addr = unix
+    else:
+        if host is None:
+            host = socket.gethostname()
+        if port is None:
+            usage()
+        addr = host, port
+
+    check_server(addr, storage, write)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except SystemExit:
+        raise
+    except Exception, err:
+        s = str(err)
+        if s:
+            s = ": " + s
+        print err.__class__.__name__ + s
+        sys.exit(1)
--- a/utilities/ZODBTools/zodbload.py
+++ b/utilities/ZODBTools/zodbload.py
+#!/usr/bin/env python2.3
+
+##############################################################################
+#
+# Copyright (c) 2003 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Test script for testing ZODB under a heavy zope-like load.
+
+Note that, to be as realistic as possible with ZEO, you should run this
+script multiple times, to simulate multiple clients.
+
+Here's how this works.
+
+The script starts some number of threads.  Each thread, sequentially
+executes jobs.  There is a job producer that produces jobs.
+
+Input data are provided by a mail producer that hands out message from
+a mailbox.
+
+Execution continues until there is an error, which will normally occur
+when the mailbox is exhausted.
+
+Command-line options are used to provide job definitions. Job
+definitions have perameters of the form name=value.  Jobs have 2
+standard parameters:
+
+  frequency=integer
+
+     The frequency of the job. The default is 1.
+
+  sleep=float
+
+     The number os seconds to sleep before performing the job. The
+     default is 0.
+
+Usage: loadmail2 [options]
+
+  Options:
+
+    -edit [frequency=integer] [sleep=float]
+
+       Define an edit job. An edit job edits a random already-saved
+       email message, deleting and inserting a random number of words.
+
+       After editing the message, the message is (re)cataloged.
+
+    -insert [number=int] [frequency=integer] [sleep=float]
+
+       Insert some number of email messages.
+
+    -index [number=int] [frequency=integer] [sleep=float]
+
+       Insert and index (catalog) some number of email messages.
+
+    -search [terms='word1 word2 ...'] [frequency=integer] [sleep=float]
+
+       Search the catalog. A query is givem with one or more terms as
+       would be entered into a typical seach box.  If no query is
+       given, then queries will be randomly selected based on a set of
+       built-in word list.
+
+    -setup
+
+       Set up the database. This will delete any existing Data.fs
+       file.  (Of course, this may have no effect, if there is a
+       custom_zodb that defined a different storage.) It also adds a
+       mail folder and a catalog.
+
+    -options file
+
+       Read options from the given file. Th efile should be a python
+       source file that defines a sequence of options named 'options'.
+
+    -threads n
+
+       Specify the number of threads to execute. If not specified (< 2),
+       then jobs are run in a single (main) thread.
+
+    -mbox filename
+
+       Specify the mailbox for getting input data.
+
+$Id: zodbload.py,v 1.7 2004/04/16 16:00:56 jeremy Exp $
+"""
+
+import mailbox
+import math
+import os
+import random
+import re
+import sys
+import threading
+import time
+import transaction
+
+class JobProducer:
+
+    def __init__(self):
+        self.jobs = []
+
+    def add(self, callable, frequency, sleep, repeatp=0):
+        self.jobs.extend([(callable, sleep, repeatp)] * int(frequency))
+        random.shuffle(self.jobs)
+
+    def next(self):
+        factory, sleep, repeatp = random.choice(self.jobs)
+        time.sleep(sleep)
+        callable, args = factory.create()
+        return factory, callable, args, repeatp
+
+    def __nonzero__(self):
+        return not not self.jobs
+
+
+
+class MBox:
+
+    def __init__(self, filename):
+        if ' ' in filename:
+            filename, min, max = filename.split()
+            min = int(min)
+            max = int(max)
+        else:
+            min = max = 0
+
+        if filename.endswith('.bz2'):
+            f = os.popen("bunzip2 <"+filename, 'r')
+            filename = filename[-4:]
+        else:
+            f = open(filename)
+
+        self._mbox = mb = mailbox.UnixMailbox(f)
+
+        self.number = min
+        while min:
+            mb.next()
+            min -= 1
+
+        self._lock = threading.Lock()
+        self.__name__ = os.path.splitext(os.path.split(filename)[1])[0]
+        self._max = max
+
+    def next(self):
+        self._lock.acquire()
+        try:
+            if self._max > 0 and self.number >= self._max:
+                raise IndexError(self.number + 1)
+            message = self._mbox.next()
+            message.body = message.fp.read()
+            message.headers = list(message.headers)
+            self.number += 1
+            message.number = self.number
+            message.mbox = self.__name__
+            return message
+        finally:
+            self._lock.release()
+
+bins = 9973
+#bins = 11
+def mailfolder(app, mboxname, number):
+    mail = getattr(app, mboxname, None)
+    if mail is None:
+        app.manage_addFolder(mboxname)
+        mail = getattr(app, mboxname)
+        from BTrees.Length import Length
+        mail.length = Length()
+        for i in range(bins):
+            mail.manage_addFolder('b'+str(i))
+    bin = hash(str(number))%bins
+    return getattr(mail, 'b'+str(bin))
+
+
+def VmSize():
+
+    try:
+        f = open('/proc/%s/status' % os.getpid())
+    except:
+        return 0
+    else:
+        l = filter(lambda l: l[:7] == 'VmSize:', f.readlines())
+        if l:
+            l = l[0][7:].strip().split()[0]
+            return int(l)
+    return 0
+
+def setup(lib_python):
+    try:
+        os.remove(os.path.join(lib_python, '..', '..', 'var', 'Data.fs'))
+    except:
+        pass
+    import Zope
+    import Products
+    import AccessControl.SecurityManagement
+    app=Zope.app()
+
+    Products.ZCatalog.ZCatalog.manage_addZCatalog(app, 'cat', '')
+
+    from Products.ZCTextIndex.ZCTextIndex import PLexicon
+    from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer
+
+    app.cat._setObject('lex',
+                       PLexicon('lex', '', Splitter(), CaseNormalizer())
+                       )
+
+    class extra:
+        doc_attr = 'PrincipiaSearchSource'
+        lexicon_id = 'lex'
+        index_type = 'Okapi BM25 Rank'
+
+    app.cat.addIndex('PrincipiaSearchSource', 'ZCTextIndex', extra)
+
+    transaction.commit()
+
+    system = AccessControl.SpecialUsers.system
+    AccessControl.SecurityManagement.newSecurityManager(None, system)
+
+    app._p_jar.close()
+
+def do(db, f, args):
+    """Do something in a transaction, retrying of necessary
+
+    Measure the speed of both the compurartion and the commit
+    """
+    from ZODB.POSException import ConflictError
+    wcomp = ccomp = wcommit = ccommit = 0.0
+    rconflicts = wconflicts = 0
+    start = time.time()
+
+    while 1:
+        connection = db.open()
+        try:
+            transaction.begin()
+            t=time.time()
+            c=time.clock()
+            try:
+                try:
+                    r = f(connection, *args)
+                except ConflictError:
+                    rconflicts += 1
+                    transaction.abort()
+                    continue
+            finally:
+                wcomp += time.time() - t
+                ccomp += time.clock() - c
+
+            t=time.time()
+            c=time.clock()
+            try:
+                try:
+                    transaction.commit()
+                    break
+                except ConflictError:
+                    wconflicts += 1
+                    transaction.abort()
+                    continue
+            finally:
+                wcommit += time.time() - t
+                ccommit += time.clock() - c
+        finally:
+            connection.close()
+
+    return start, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit, r
+
+def run1(tid, db, factory, job, args):
+    (start, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit, r
+     ) = do(db, job, args)
+    start = "%.4d-%.2d-%.2d %.2d:%.2d:%.2d" % time.localtime(start)[:6]
+    print "%s %s %8.3g %8.3g %s %s\t%8.3g %8.3g %s %r" % (
+        start, tid, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit,
+        factory.__name__, r)
+
+def run(jobs, tid=''):
+    import Zope
+    while 1:
+        factory, job, args, repeatp = jobs.next()
+        run1(tid, Zope.DB, factory, job, args)
+        if repeatp:
+            while 1:
+                i = random.randint(0,100)
+                if i > repeatp:
+                    break
+                run1(tid, Zope.DB, factory, job, args)
+
+
+def index(connection, messages, catalog):
+    app = connection.root()['Application']
+    for message in messages:
+        mail = mailfolder(app, message.mbox, message.number)
+        docid = 'm'+str(message.number)
+        mail.manage_addDTMLDocument(docid, file=message.body)
+
+        # increment counted
+        getattr(app, message.mbox).length.change(1)
+
+        doc = mail[docid]
+        for h in message.headers:
+            h = h.strip()
+            l = h.find(':')
+            if l <= 0:
+                continue
+            name = h[:l].lower()
+            if name=='subject':
+                name='title'
+            v = h[l+1:].strip()
+            type='string'
+
+            if name=='title':
+                doc.manage_changeProperties(title=h)
+            else:
+                try:
+                    doc.manage_addProperty(name, v, type)
+                except:
+                    pass
+        if catalog:
+            app.cat.catalog_object(doc)
+
+    return message.number
+
+class IndexJob:
+    needs_mbox = 1
+    catalog = 1
+    prefix = 'index'
+
+    def __init__(self, mbox, number=1):
+        self.__name__ = "%s%s_%s" % (self.prefix, number, mbox.__name__)
+        self.mbox, self.number = mbox, int(number)
+
+    def create(self):
+        messages = [self.mbox.next() for i in range(self.number)]
+        return index, (messages, self.catalog)
+
+
+class InsertJob(IndexJob):
+    catalog = 0
+    prefix = 'insert'
+
+wordre = re.compile(r'(\w{3,20})')
+stop = 'and', 'not'
+def edit(connection, mbox, catalog=1):
+    app = connection.root()['Application']
+    mail = getattr(app, mbox.__name__, None)
+    if mail is None:
+        time.sleep(1)
+        return "No mailbox %s" % mbox.__name__
+
+    nmessages = mail.length()
+    if nmessages < 2:
+        time.sleep(1)
+        return "No messages to edit in %s" % mbox.__name__
+
+    # find a message to edit:
+    while 1:
+        number = random.randint(1, nmessages-1)
+        did = 'm' + str(number)
+
+        mail = mailfolder(app, mbox.__name__, number)
+        doc = getattr(mail, did, None)
+        if doc is not None:
+            break
+
+    text = doc.raw.split()
+    norig = len(text)
+    if norig > 10:
+        ndel = int(math.exp(random.randint(0, int(math.log(norig)))))
+        nins = int(math.exp(random.randint(0, int(math.log(norig)))))
+    else:
+        ndel = 0
+        nins = 10
+
+    for j in range(ndel):
+        j = random.randint(0,len(text)-1)
+        word = text[j]
+        m = wordre.search(word)
+        if m:
+            word = m.group(1).lower()
+            if (not wordsd.has_key(word)) and word not in stop:
+                words.append(word)
+                wordsd[word] = 1
+        del text[j]
+
+    for j in range(nins):
+        word = random.choice(words)
+        text.append(word)
+
+    doc.raw = ' '.join(text)
+
+    if catalog:
+        app.cat.catalog_object(doc)
+
+    return norig, ndel, nins
+
+class EditJob:
+    needs_mbox = 1
+    prefix = 'edit'
+    catalog = 1
+
+    def __init__(self, mbox):
+        self.__name__ = "%s_%s" % (self.prefix, mbox.__name__)
+        self.mbox = mbox
+
+    def create(self):
+        return edit, (self.mbox, self.catalog)
+
+class ModifyJob(EditJob):
+    prefix = 'modify'
+    catalog = 0
+
+
+def search(connection, terms, number):
+    app = connection.root()['Application']
+    cat = app.cat
+    n = 0
+
+    for i in number:
+        term = random.choice(terms)
+
+        results = cat(PrincipiaSearchSource=term)
+        n += len(results)
+        for result in results:
+            obj = result.getObject()
+            # Apparently, there is a bug in Zope that leads obj to be None
+            # on occasion.
+            if obj is not None:
+                obj.getId()
+
+    return n
+
+class SearchJob:
+
+    def __init__(self, terms='', number=10):
+
+        if terms:
+            terms = terms.split()
+            self.__name__ = "search_" + '_'.join(terms)
+            self.terms = terms
+        else:
+            self.__name__ = 'search'
+            self.terms = words
+
+        number = min(int(number), len(self.terms))
+        self.number = range(number)
+
+    def create(self):
+        return search, (self.terms, self.number)
+
+
+words=['banishment', 'indirectly', 'imprecise', 'peeks',
+'opportunely', 'bribe', 'sufficiently', 'Occidentalized', 'elapsing',
+'fermenting', 'listen', 'orphanage', 'younger', 'draperies', 'Ida',
+'cuttlefish', 'mastermind', 'Michaels', 'populations', 'lent',
+'cater', 'attentional', 'hastiness', 'dragnet', 'mangling',
+'scabbards', 'princely', 'star', 'repeat', 'deviation', 'agers',
+'fix', 'digital', 'ambitious', 'transit', 'jeeps', 'lighted',
+'Prussianizations', 'Kickapoo', 'virtual', 'Andrew', 'generally',
+'boatsman', 'amounts', 'promulgation', 'Malay', 'savaging',
+'courtesan', 'nursed', 'hungered', 'shiningly', 'ship', 'presides',
+'Parke', 'moderns', 'Jonas', 'unenlightening', 'dearth', 'deer',
+'domesticates', 'recognize', 'gong', 'penetrating', 'dependents',
+'unusually', 'complications', 'Dennis', 'imbalances', 'nightgown',
+'attached', 'testaments', 'congresswoman', 'circuits', 'bumpers',
+'braver', 'Boreas', 'hauled', 'Howe', 'seethed', 'cult', 'numismatic',
+'vitality', 'differences', 'collapsed', 'Sandburg', 'inches', 'head',
+'rhythmic', 'opponent', 'blanketer', 'attorneys', 'hen', 'spies',
+'indispensably', 'clinical', 'redirection', 'submit', 'catalysts',
+'councilwoman', 'kills', 'topologies', 'noxious', 'exactions',
+'dashers', 'balanced', 'slider', 'cancerous', 'bathtubs', 'legged',
+'respectably', 'crochets', 'absenteeism', 'arcsine', 'facility',
+'cleaners', 'bobwhite', 'Hawkins', 'stockade', 'provisional',
+'tenants', 'forearms', 'Knowlton', 'commit', 'scornful',
+'pediatrician', 'greets', 'clenches', 'trowels', 'accepts',
+'Carboloy', 'Glenn', 'Leigh', 'enroll', 'Madison', 'Macon', 'oiling',
+'entertainingly', 'super', 'propositional', 'pliers', 'beneficiary',
+'hospitable', 'emigration', 'sift', 'sensor', 'reserved',
+'colonization', 'shrilled', 'momentously', 'stevedore', 'Shanghaiing',
+'schoolmasters', 'shaken', 'biology', 'inclination', 'immoderate',
+'stem', 'allegory', 'economical', 'daytime', 'Newell', 'Moscow',
+'archeology', 'ported', 'scandals', 'Blackfoot', 'leery', 'kilobit',
+'empire', 'obliviousness', 'productions', 'sacrificed', 'ideals',
+'enrolling', 'certainties', 'Capsicum', 'Brookdale', 'Markism',
+'unkind', 'dyers', 'legislates', 'grotesquely', 'megawords',
+'arbitrary', 'laughing', 'wildcats', 'thrower', 'sex', 'devils',
+'Wehr', 'ablates', 'consume', 'gossips', 'doorways', 'Shari',
+'advanced', 'enumerable', 'existentially', 'stunt', 'auctioneers',
+'scheduler', 'blanching', 'petulance', 'perceptibly', 'vapors',
+'progressed', 'rains', 'intercom', 'emergency', 'increased',
+'fluctuating', 'Krishna', 'silken', 'reformed', 'transformation',
+'easter', 'fares', 'comprehensible', 'trespasses', 'hallmark',
+'tormenter', 'breastworks', 'brassiere', 'bladders', 'civet', 'death',
+'transformer', 'tolerably', 'bugle', 'clergy', 'mantels', 'satin',
+'Boswellizes', 'Bloomington', 'notifier', 'Filippo', 'circling',
+'unassigned', 'dumbness', 'sentries', 'representativeness', 'souped',
+'Klux', 'Kingstown', 'gerund', 'Russell', 'splices', 'bellow',
+'bandies', 'beefers', 'cameramen', 'appalled', 'Ionian', 'butterball',
+'Portland', 'pleaded', 'admiringly', 'pricks', 'hearty', 'corer',
+'deliverable', 'accountably', 'mentors', 'accorded',
+'acknowledgement', 'Lawrenceville', 'morphology', 'eucalyptus',
+'Rena', 'enchanting', 'tighter', 'scholars', 'graduations', 'edges',
+'Latinization', 'proficiency', 'monolithic', 'parenthesizing', 'defy',
+'shames', 'enjoyment', 'Purdue', 'disagrees', 'barefoot', 'maims',
+'flabbergast', 'dishonorable', 'interpolation', 'fanatics', 'dickens',
+'abysses', 'adverse', 'components', 'bowl', 'belong', 'Pipestone',
+'trainees', 'paw', 'pigtail', 'feed', 'whore', 'conditioner',
+'Volstead', 'voices', 'strain', 'inhabits', 'Edwin', 'discourses',
+'deigns', 'cruiser', 'biconvex', 'biking', 'depreciation', 'Harrison',
+'Persian', 'stunning', 'agar', 'rope', 'wagoner', 'elections',
+'reticulately', 'Cruz', 'pulpits', 'wilt', 'peels', 'plants',
+'administerings', 'deepen', 'rubs', 'hence', 'dissension', 'implored',
+'bereavement', 'abyss', 'Pennsylvania', 'benevolent', 'corresponding',
+'Poseidon', 'inactive', 'butchers', 'Mach', 'woke', 'loading',
+'utilizing', 'Hoosier', 'undo', 'Semitization', 'trigger', 'Mouthe',
+'mark', 'disgracefully', 'copier', 'futility', 'gondola', 'algebraic',
+'lecturers', 'sponged', 'instigators', 'looted', 'ether', 'trust',
+'feeblest', 'sequencer', 'disjointness', 'congresses', 'Vicksburg',
+'incompatibilities', 'commend', 'Luxembourg', 'reticulation',
+'instructively', 'reconstructs', 'bricks', 'attache', 'Englishman',
+'provocation', 'roughen', 'cynic', 'plugged', 'scrawls', 'antipode',
+'injected', 'Daedalus', 'Burnsides', 'asker', 'confronter',
+'merriment', 'disdain', 'thicket', 'stinker', 'great', 'tiers',
+'oust', 'antipodes', 'Macintosh', 'tented', 'packages',
+'Mediterraneanize', 'hurts', 'orthodontist', 'seeder', 'readying',
+'babying', 'Florida', 'Sri', 'buckets', 'complementary',
+'cartographer', 'chateaus', 'shaves', 'thinkable', 'Tehran',
+'Gordian', 'Angles', 'arguable', 'bureau', 'smallest', 'fans',
+'navigated', 'dipole', 'bootleg', 'distinctive', 'minimization',
+'absorbed', 'surmised', 'Malawi', 'absorbent', 'close', 'conciseness',
+'hopefully', 'declares', 'descent', 'trick', 'portend', 'unable',
+'mildly', 'Morse', 'reference', 'scours', 'Caribbean', 'battlers',
+'astringency', 'likelier', 'Byronizes', 'econometric', 'grad',
+'steak', 'Austrian', 'ban', 'voting', 'Darlington', 'bison', 'Cetus',
+'proclaim', 'Gilbertson', 'evictions', 'submittal', 'bearings',
+'Gothicizer', 'settings', 'McMahon', 'densities', 'determinants',
+'period', 'DeKastere', 'swindle', 'promptness', 'enablers', 'wordy',
+'during', 'tables', 'responder', 'baffle', 'phosgene', 'muttering',
+'limiters', 'custodian', 'prevented', 'Stouffer', 'waltz', 'Videotex',
+'brainstorms', 'alcoholism', 'jab', 'shouldering', 'screening',
+'explicitly', 'earner', 'commandment', 'French', 'scrutinizing',
+'Gemma', 'capacitive', 'sheriff', 'herbivore', 'Betsey', 'Formosa',
+'scorcher', 'font', 'damming', 'soldiers', 'flack', 'Marks',
+'unlinking', 'serenely', 'rotating', 'converge', 'celebrities',
+'unassailable', 'bawling', 'wording', 'silencing', 'scotch',
+'coincided', 'masochists', 'graphs', 'pernicious', 'disease',
+'depreciates', 'later', 'torus', 'interject', 'mutated', 'causer',
+'messy', 'Bechtel', 'redundantly', 'profoundest', 'autopsy',
+'philosophic', 'iterate', 'Poisson', 'horridly', 'silversmith',
+'millennium', 'plunder', 'salmon', 'missioner', 'advances', 'provers',
+'earthliness', 'manor', 'resurrectors', 'Dahl', 'canto', 'gangrene',
+'gabler', 'ashore', 'frictionless', 'expansionism', 'emphasis',
+'preservations', 'Duane', 'descend', 'isolated', 'firmware',
+'dynamites', 'scrawled', 'cavemen', 'ponder', 'prosperity', 'squaw',
+'vulnerable', 'opthalmic', 'Simms', 'unite', 'totallers', 'Waring',
+'enforced', 'bridge', 'collecting', 'sublime', 'Moore', 'gobble',
+'criticizes', 'daydreams', 'sedate', 'apples', 'Concordia',
+'subsequence', 'distill', 'Allan', 'seizure', 'Isadore', 'Lancashire',
+'spacings', 'corresponded', 'hobble', 'Boonton', 'genuineness',
+'artifact', 'gratuities', 'interviewee', 'Vladimir', 'mailable',
+'Bini', 'Kowalewski', 'interprets', 'bereave', 'evacuated', 'friend',
+'tourists', 'crunched', 'soothsayer', 'fleetly', 'Romanizations',
+'Medicaid', 'persevering', 'flimsy', 'doomsday', 'trillion',
+'carcasses', 'guess', 'seersucker', 'ripping', 'affliction',
+'wildest', 'spokes', 'sheaths', 'procreate', 'rusticates', 'Schapiro',
+'thereafter', 'mistakenly', 'shelf', 'ruination', 'bushel',
+'assuredly', 'corrupting', 'federation', 'portmanteau', 'wading',
+'incendiary', 'thing', 'wanderers', 'messages', 'Paso', 'reexamined',
+'freeings', 'denture', 'potting', 'disturber', 'laborer', 'comrade',
+'intercommunicating', 'Pelham', 'reproach', 'Fenton', 'Alva', 'oasis',
+'attending', 'cockpit', 'scout', 'Jude', 'gagging', 'jailed',
+'crustaceans', 'dirt', 'exquisitely', 'Internet', 'blocker', 'smock',
+'Troutman', 'neighboring', 'surprise', 'midscale', 'impart',
+'badgering', 'fountain', 'Essen', 'societies', 'redresses',
+'afterwards', 'puckering', 'silks', 'Blakey', 'sequel', 'greet',
+'basements', 'Aubrey', 'helmsman', 'album', 'wheelers', 'easternmost',
+'flock', 'ambassadors', 'astatine', 'supplant', 'gird', 'clockwork',
+'foxes', 'rerouting', 'divisional', 'bends', 'spacer',
+'physiologically', 'exquisite', 'concerts', 'unbridled', 'crossing',
+'rock', 'leatherneck', 'Fortescue', 'reloading', 'Laramie', 'Tim',
+'forlorn', 'revert', 'scarcer', 'spigot', 'equality', 'paranormal',
+'aggrieves', 'pegs', 'committeewomen', 'documented', 'interrupt',
+'emerald', 'Battelle', 'reconverted', 'anticipated', 'prejudices',
+'drowsiness', 'trivialities', 'food', 'blackberries', 'Cyclades',
+'tourist', 'branching', 'nugget', 'Asilomar', 'repairmen', 'Cowan',
+'receptacles', 'nobler', 'Nebraskan', 'territorial', 'chickadee',
+'bedbug', 'darted', 'vigilance', 'Octavia', 'summands', 'policemen',
+'twirls', 'style', 'outlawing', 'specifiable', 'pang', 'Orpheus',
+'epigram', 'Babel', 'butyrate', 'wishing', 'fiendish', 'accentuate',
+'much', 'pulsed', 'adorned', 'arbiters', 'counted', 'Afrikaner',
+'parameterizes', 'agenda', 'Americanism', 'referenda', 'derived',
+'liquidity', 'trembling', 'lordly', 'Agway', 'Dillon', 'propellers',
+'statement', 'stickiest', 'thankfully', 'autograph', 'parallel',
+'impulse', 'Hamey', 'stylistic', 'disproved', 'inquirer', 'hoisting',
+'residues', 'variant', 'colonials', 'dequeued', 'especial', 'Samoa',
+'Polaris', 'dismisses', 'surpasses', 'prognosis', 'urinates',
+'leaguers', 'ostriches', 'calculative', 'digested', 'divided',
+'reconfigurer', 'Lakewood', 'illegalities', 'redundancy',
+'approachability', 'masterly', 'cookery', 'crystallized', 'Dunham',
+'exclaims', 'mainline', 'Australianizes', 'nationhood', 'pusher',
+'ushers', 'paranoia', 'workstations', 'radiance', 'impedes',
+'Minotaur', 'cataloging', 'bites', 'fashioning', 'Alsop', 'servants',
+'Onondaga', 'paragraph', 'leadings', 'clients', 'Latrobe',
+'Cornwallis', 'excitingly', 'calorimetric', 'savior', 'tandem',
+'antibiotics', 'excuse', 'brushy', 'selfish', 'naive', 'becomes',
+'towers', 'popularizes', 'engender', 'introducing', 'possession',
+'slaughtered', 'marginally', 'Packards', 'parabola', 'utopia',
+'automata', 'deterrent', 'chocolates', 'objectives', 'clannish',
+'aspirin', 'ferociousness', 'primarily', 'armpit', 'handfuls',
+'dangle', 'Manila', 'enlivened', 'decrease', 'phylum', 'hardy',
+'objectively', 'baskets', 'chaired', 'Sepoy', 'deputy', 'blizzard',
+'shootings', 'breathtaking', 'sticking', 'initials', 'epitomized',
+'Forrest', 'cellular', 'amatory', 'radioed', 'horrified', 'Neva',
+'simultaneous', 'delimiter', 'expulsion', 'Himmler', 'contradiction',
+'Remus', 'Franklinizations', 'luggage', 'moisture', 'Jews',
+'comptroller', 'brevity', 'contradictions', 'Ohio', 'active',
+'babysit', 'China', 'youngest', 'superstition', 'clawing', 'raccoons',
+'chose', 'shoreline', 'helmets', 'Jeffersonian', 'papered',
+'kindergarten', 'reply', 'succinct', 'split', 'wriggle', 'suitcases',
+'nonce', 'grinders', 'anthem', 'showcase', 'maimed', 'blue', 'obeys',
+'unreported', 'perusing', 'recalculate', 'rancher', 'demonic',
+'Lilliputianize', 'approximation', 'repents', 'yellowness',
+'irritates', 'Ferber', 'flashlights', 'booty', 'Neanderthal',
+'someday', 'foregoes', 'lingering', 'cloudiness', 'guy', 'consumer',
+'Berkowitz', 'relics', 'interpolating', 'reappearing', 'advisements',
+'Nolan', 'turrets', 'skeletal', 'skills', 'mammas', 'Winsett',
+'wheelings', 'stiffen', 'monkeys', 'plainness', 'braziers', 'Leary',
+'advisee', 'jack', 'verb', 'reinterpret', 'geometrical', 'trolleys',
+'arboreal', 'overpowered', 'Cuzco', 'poetical', 'admirations',
+'Hobbes', 'phonemes', 'Newsweek', 'agitator', 'finally', 'prophets',
+'environment', 'easterners', 'precomputed', 'faults', 'rankly',
+'swallowing', 'crawl', 'trolley', 'spreading', 'resourceful', 'go',
+'demandingly', 'broader', 'spiders', 'Marsha', 'debris', 'operates',
+'Dundee', 'alleles', 'crunchier', 'quizzical', 'hanging', 'Fisk']
+
+wordsd = {}
+for word in words:
+    wordsd[word] = 1
+
+
+def collect_options(args, jobs, options):
+
+    while args:
+        arg = args.pop(0)
+        if arg.startswith('-'):
+            name = arg[1:]
+            if name == 'options':
+                fname = args.pop(0)
+                d = {}
+                execfile(fname, d)
+                collect_options(list(d['options']), jobs, options)
+            elif options.has_key(name):
+                v = args.pop(0)
+                if options[name] != None:
+                    raise ValueError(
+                        "Duplicate values for %s, %s and %s"
+                        % (name, v, options[name])
+                        )
+                options[name] = v
+            elif name == 'setup':
+                options['setup'] = 1
+            elif globals().has_key(name.capitalize()+'Job'):
+                job = name
+                kw = {}
+                while args and args[0].find("=") > 0:
+                    arg = args.pop(0).split('=')
+                    name, v = arg[0], '='.join(arg[1:])
+                    if kw.has_key(name):
+                        raise ValueError(
+                            "Duplicate parameter %s for job %s"
+                            % (name, job)
+                            )
+                    kw[name]=v
+                if kw.has_key('frequency'):
+                    frequency = kw['frequency']
+                    del kw['frequency']
+                else:
+                    frequency = 1
+
+                if kw.has_key('sleep'):
+                    sleep = float(kw['sleep'])
+                    del kw['sleep']
+                else:
+                    sleep = 0.0001
+
+                if kw.has_key('repeat'):
+                    repeatp = float(kw['repeat'])
+                    del kw['repeat']
+                else:
+                    repeatp = 0
+
+                jobs.append((job, kw, frequency, sleep, repeatp))
+            else:
+                raise ValueError("not an option or job", name)
+        else:
+            raise ValueError("Expected an option", arg)
+
+
+def find_lib_python():
+    for b in os.getcwd(), os.path.split(sys.argv[0])[0]:
+        for i in range(6):
+            d = ['..']*i + ['lib', 'python']
+            p = os.path.join(b, *d)
+            if os.path.isdir(p):
+                return p
+    raise ValueError("Couldn't find lib/python")
+
+def main(args=None):
+    lib_python = find_lib_python()
+    sys.path.insert(0, lib_python)
+
+    if args is None:
+        args = sys.argv[1:]
+    if not args:
+        print __doc__
+        sys.exit(0)
+
+    print args
+    random.seed(hash(tuple(args))) # always use the same for the given args
+
+    options = {"mbox": None, "threads": None}
+    jobdefs = []
+    collect_options(args, jobdefs, options)
+
+    mboxes = {}
+    if options["mbox"]:
+        mboxes[options["mbox"]] = MBox(options["mbox"])
+
+    if options.has_key('setup'):
+        setup(lib_python)
+    else:
+        import Zope
+        Zope.startup()
+
+    #from ThreadedAsync.LoopCallback import loop
+    #threading.Thread(target=loop, args=(), name='asyncore').start()
+
+    jobs = JobProducer()
+    for job, kw, frequency, sleep, repeatp in jobdefs:
+        Job = globals()[job.capitalize()+'Job']
+        if getattr(Job, 'needs_mbox', 0):
+            if not kw.has_key("mbox"):
+                if not options["mbox"]:
+                    raise ValueError(
+                        "no mailbox (mbox option) file  specified")
+                kw['mbox'] = mboxes[options["mbox"]]
+            else:
+                if not mboxes.has_key[kw["mbox"]]:
+                    mboxes[kw['mbox']] = MBox[kw['mbox']]
+                kw["mbox"] = mboxes[kw['mbox']]
+        jobs.add(Job(**kw), frequency, sleep, repeatp)
+
+    if not jobs:
+        print "No jobs to execute"
+        return
+
+    threads = int(options['threads'] or '0')
+    if threads > 1:
+        threads = [threading.Thread(target=run, args=(jobs, i), name=str(i))
+                   for i in range(threads)]
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+    else:
+        run(jobs)
+
+
+if __name__ == '__main__':
+    main()