Commit 643a9757 authored by root's avatar root

Use tagged version of src/scripts

parent b0e88871
This directory contains a collection of utilities for managing ZODB
databases. Some are more useful than others. If you install ZODB
using distutils ("python setup.py install"), fsdump.py, fstest.py,
repozo.py, and zeopack.py will be installed in /usr/local/bin.
Unless otherwise noted, these scripts are invoked with the name of the
Data.fs file as their only argument. Example: checkbtrees.py data.fs.
analyze.py -- a transaction analyzer for FileStorage
Reports on the data in a FileStorage. The report is organized by
class. It shows total data, as well as separate reports for current
and historical revisions of objects.
checkbtrees.py -- checks BTrees in a FileStorage for corruption
Attempts to find all the BTrees contained in a Data.fs, calls their
_check() methods, and runs them through BTrees.check.check().
fsdump.py -- summarize FileStorage contents, one line per revision
Prints a report of FileStorage contents, with one line for each
transaction and one line for each data record in that transaction.
Includes time stamps, file positions, and class names.
fstest.py -- simple consistency checker for FileStorage
usage: fstest.py [-v] data.fs
The fstest tool will scan all the data in a FileStorage and report an
error if it finds any corrupt transaction data. The tool will print a
message when the first error is detected an exit.
The tool accepts one or more -v arguments. If a single -v is used, it
will print a line of text for each transaction record it encounters.
If two -v arguments are used, it will also print a line of text for
each object. The objects for a transaction will be printed before the
transaction itself.
Note: It does not check the consistency of the object pickles. It is
possible for the damage to occur only in the part of the file that
stores object pickles. Those errors will go undetected.
space.py -- report space used by objects in a FileStorage
usage: space.py [-v] data.fs
This ignores revisions and versions.
netspace.py -- hackish attempt to report on size of objects
usage: netspace.py [-P | -v] data.fs
-P: do a pack first
-v: print info for all objects, even if a traversal path isn't found
Traverses objects from the database root and attempts to calculate
size of object, including all reachable subobjects.
parsezeolog.py -- parse BLATHER logs from ZEO server
This script may be obsolete. It has not been tested against the
current log output of the ZEO server.
Reports on the time and size of transactions committed by a ZEO
server, by inspecting log messages at BLATHER level.
repozo.py -- incremental backup utility for FileStorage
Run the script with the -h option to see usage details.
timeout.py -- script to test transaction timeout
usage: timeout.py address delay [storage-name]
This script connects to a storage, begins a transaction, calls store()
and tpc_vote(), and then sleeps forever. This should trigger the
transaction timeout feature of the server.
zeopack.py -- pack a ZEO server
The script connects to a server and calls pack() on a specific
storage. See the script for usage details.
zeoreplay.py -- experimental script to replay transactions from a ZEO log
Like parsezeolog.py, this may be obsolete because it was written
against an earlier version of the ZEO server. See the script for
usage details.
zeoup.py
usage: zeoup.py [options]
The test will connect to a ZEO server, load the root object, and
attempt to update the zeoup counter in the root. It will report
success if it updates to counter or if it gets a ConflictError. A
ConflictError is considered a success, because the client was able to
start a transaction.
See the script for details about the options.
zodbload.py -- exercise ZODB under a heavy synthesized Zope-like load
See the module docstring for details. Note that this script requires
Zope. New in ZODB3 3.1.4.
zeoserverlog.py -- analyze ZEO server log for performance statistics
See the module docstring for details; there are a large number of
options. New in ZODB3 3.1.4.
fsrefs.py -- check FileStorage for dangling references
fstail.py -- display the most recent transactions in a FileStorage
usage: fstail.py [-n nxtn] data.fs
The most recent ntxn transactions are displayed, to stdout.
Optional argument -n specifies ntxn, and defaults to 10.
migrate.py -- do a storage migration and gather statistics
See the module docstring for details.
zeoqueue.py -- report number of clients currently waiting in the ZEO queue
See the module docstring for details.
#!/usr/bin/env python2.3
# Based on a transaction analyzer by Matt Kromer.
import pickle
import re
import sys
import types
from ZODB.FileStorage import FileStorage
class Report:
def __init__(self):
self.OIDMAP = {}
self.TYPEMAP = {}
self.TYPESIZE = {}
self.FREEMAP = {}
self.USEDMAP = {}
self.TIDS = 0
self.OIDS = 0
self.DBYTES = 0
self.COIDS = 0
self.CBYTES = 0
self.FOIDS = 0
self.FBYTES = 0
def shorten(s, n):
l = len(s)
if l <= n:
return s
while len(s) + 3 > n: # account for ...
i = s.find(".")
if i == -1:
# In the worst case, just return the rightmost n bytes
return s[-n:]
else:
s = s[i + 1:]
l = len(s)
return "..." + s
def report(rep):
print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
print ("Average transaction size is %7.2f bytes" %
(rep.DBYTES * 1.0 / rep.TIDS))
print "Types used:"
fmt = "%-46s %7s %9s %6s %7s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
print fmt % ("Class Name", "Count", "TBytes", "Pct", "AvgSize")
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
typemap = rep.TYPEMAP.keys()
typemap.sort()
cumpct = 0.0
for t in typemap:
pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
cumpct += pct
print fmtp % (shorten(t, 46), rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
rep.CBYTES * 100.0 / rep.DBYTES,
rep.CBYTES * 1.0 / rep.COIDS)
if rep.FOIDS:
print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path):
fs = FileStorage(path, read_only=1)
fsi = fs.iterator()
report = Report()
while 1:
try:
transaction = fsi.next()
except IndexError:
break
analyze_trans(report, transaction)
return report
def analyze_trans(report, txn):
report.TIDS += 1
while 1:
try:
rec = txn.next()
except IndexError:
break
analyze_rec(report, rec)
def get_type(record):
try:
classinfo = pickle.loads(record.data)[0]
except SystemError, err:
s = str(err)
mo = re.match('Failed to import class (\S+) from module (\S+)', s)
if mo is None:
raise
else:
klass, mod = mo.group(1, 2)
return "%s.%s" % (mod, klass)
if isinstance(classinfo, types.TupleType):
mod, klass = classinfo
return "%s.%s" % (mod, klass)
else:
return str(classinfo)
def analyze_rec(report, record):
oid = record.oid
report.OIDS += 1
try:
size = len(record.data) # Ignores various overhead
report.DBYTES += size
if not report.OIDMAP.has_key(oid):
type = get_type(record)
report.OIDMAP[oid] = type
report.USEDMAP[oid] = size
report.COIDS += 1
report.CBYTES += size
else:
type = report.OIDMAP[oid]
fsize = report.USEDMAP[oid]
report.FREEMAP[oid] = report.FREEMAP.get(oid, 0) + fsize
report.USEDMAP[oid] = size
report.FOIDS += 1
report.FBYTES += fsize
report.CBYTES += size - fsize
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
except Exception, err:
print err
if __name__ == "__main__":
path = sys.argv[1]
report(analyze(path))
#!/usr/bin/env python2.3
"""Check the consistency of BTrees in a Data.fs
usage: checkbtrees.py data.fs
Try to find all the BTrees in a Data.fs, call their _check() methods,
and run them through BTrees.check.check().
"""
from types import IntType
import ZODB
from ZODB.FileStorage import FileStorage
from BTrees.check import check
# Set of oids we've already visited. Since the object structure is
# a general graph, this is needed to prevent unbounded paths in the
# presence of cycles. It's also helpful in eliminating redundant
# checking when a BTree is pointed to by many objects.
oids_seen = {}
# Append (obj, path) to L if and only if obj is a persistent object
# and we haven't seen it before.
def add_if_new_persistent(L, obj, path):
global oids_seen
getattr(obj, '_', None) # unghostify
if hasattr(obj, '_p_oid'):
oid = obj._p_oid
if not oids_seen.has_key(oid):
L.append((obj, path))
oids_seen[oid] = 1
def get_subobjects(obj):
getattr(obj, '_', None) # unghostify
sub = []
try:
attrs = obj.__dict__.items()
except AttributeError:
attrs = ()
for pair in attrs:
sub.append(pair)
# what if it is a mapping?
try:
items = obj.items()
except AttributeError:
items = ()
for k, v in items:
if not isinstance(k, IntType):
sub.append(("<key>", k))
if not isinstance(v, IntType):
sub.append(("[%s]" % repr(k), v))
# what if it is a sequence?
i = 0
while 1:
try:
elt = obj[i]
except:
break
sub.append(("[%d]" % i, elt))
i += 1
return sub
def main(fname):
fs = FileStorage(fname, read_only=1)
cn = ZODB.DB(fs).open()
rt = cn.root()
todo = []
add_if_new_persistent(todo, rt, '')
found = 0
while todo:
obj, path = todo.pop(0)
found += 1
if not path:
print "<root>", repr(obj)
else:
print path, repr(obj)
mod = str(obj.__class__.__module__)
if mod.startswith("BTrees"):
if hasattr(obj, "_check"):
try:
obj._check()
except AssertionError, msg:
print "*" * 60
print msg
print "*" * 60
try:
check(obj)
except AssertionError, msg:
print "*" * 60
print msg
print "*" * 60
if found % 100 == 0:
cn.cacheMinimize()
for k, v in get_subobjects(obj):
if k.startswith('['):
# getitem
newpath = "%s%s" % (path, k)
else:
newpath = "%s.%s" % (path, k)
add_if_new_persistent(todo, v, newpath)
print "total", len(fs._index), "found", found
if __name__ == "__main__":
import sys
try:
fname, = sys.argv[1:]
except:
print __doc__
sys.exit(2)
main(fname)
#!/usr/bin/env python2.3
"""Print a text summary of the contents of a FileStorage."""
from ZODB.FileStorage.fsdump import fsdump
if __name__ == "__main__":
import sys
fsdump(sys.argv[1])
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Check FileStorage for dangling references.
usage: fsrefs.py data.fs
This script ignores versions, which might produce incorrect results
for storages that use versions.
"""
from ZODB.FileStorage import FileStorage
from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64
from ZODB.FileStorage.fsdump import get_pickle_metadata
import cPickle
import cStringIO
import traceback
import types
VERBOSE = 0
def get_refs(pickle):
refs = []
f = cStringIO.StringIO(pickle)
u = cPickle.Unpickler(f)
u.persistent_load = refs
u.noload()
u.noload()
return refs
def report(oid, data, serial, fs, missing):
from_mod, from_class = get_pickle_metadata(data)
if len(missing) > 1:
plural = "s"
else:
plural = ""
ts = TimeStamp(serial)
print "oid %s %s.%s" % (hex(u64(oid)), from_mod, from_class)
print "last updated: %s, tid=%s" % (ts, hex(u64(serial)))
print "refers to invalid object%s:" % plural
for oid, info, reason in missing:
if isinstance(info, types.TupleType):
description = "%s.%s" % info
else:
description = str(info)
print "\toid %s %s: %r" % (hex(u64(oid)), reason, description)
print
def main(path):
fs = FileStorage(path, read_only=1)
noload = {}
for oid in fs._index.keys():
try:
data, serial = fs.load(oid, "")
except:
print "oid %s failed to load" % hex(u64(oid))
if VERBOSE:
traceback.print_exc()
noload[oid] = 1
# XXX If we get here after we've already loaded objects
# that refer to this one, we won't get error reports from
# them. We could fix this by making two passes over the
# storage, but that seems like overkill.
refs = get_refs(data)
missing = [] # contains 3-tuples of oid, klass-metadata, reason
for info in refs:
try:
ref, klass = info
except (ValueError, TypeError):
# failed to unpack
ref = info
klass = '<unknown>'
if not fs._index.has_key(ref):
missing.append((ref, klass, "missing"))
if noload.has_key(ref):
missing.append((ref, klass, "failed to load"))
if missing:
report(oid, data, serial, fs, missing)
if __name__ == "__main__":
import sys
import getopt
opts, args = getopt.getopt(sys.argv[1:], "v")
for k, v in opts:
if k == "-v":
VERBOSE += 1
path, = args
main(path)
#!/usr/bin/env python2.3
"""Print details statistics from fsdump output."""
import re
import sys
rx_txn = re.compile("tid=([0-9a-f]+).*size=(\d+)")
rx_data = re.compile("oid=([0-9a-f]+) class=(\S+) size=(\d+)")
def sort_byhsize(seq, reverse=False):
L = [(v.size(), k, v) for k, v in seq]
L.sort()
if reverse:
L.reverse()
return [(k, v) for n, k, v in L]
class Histogram(dict):
def add(self, size):
self[size] = self.get(size, 0) + 1
def size(self):
return sum(self.itervalues())
def mean(self):
product = sum([k * v for k, v in self.iteritems()])
return product / self.size()
def median(self):
# close enough?
n = self.size() / 2
L = self.keys()
L.sort()
L.reverse()
while 1:
k = L.pop()
if self[k] > n:
return k
n -= self[k]
def mode(self):
mode = 0
value = 0
for k, v in self.iteritems():
if v > value:
value = v
mode = k
return mode
def make_bins(self, binsize):
maxkey = max(self.iterkeys())
self.binsize = binsize
self.bins = [0] * (1 + maxkey / binsize)
for k, v in self.iteritems():
b = k / binsize
self.bins[b] += v
def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
if usebins:
# Use existing bins with whatever size they have
binsize = self.binsize
else:
# Make new bins
self.make_bins(binsize)
maxval = max(self.bins)
# Print up to 40 dots for a value
dot = max(maxval / 40, 1)
tot = sum(self.bins)
print name
print "Total", tot,
print "Median", self.median(),
print "Mean", self.mean(),
print "Mode", self.mode(),
print "Max", max(self)
print "One * represents", dot
gap = False
cum = 0
for i, n in enumerate(self.bins):
if gaps and (not n or (skip and not n / dot)):
if not gap:
print " ..."
gap = True
continue
gap = False
p = 100 * n / tot
cum += n
pc = 100 * cum / tot
print "%6d %6d %3d%% %3d%% %s" % (
i * binsize, n, p, pc, "*" * (n / dot))
print
def class_detail(class_size):
# summary of classes
fmt = "%5s %6s %6s %6s %-50.50s"
labels = ["num", "median", "mean", "mode", "class"]
print fmt % tuple(labels)
print fmt % tuple(["-" * len(s) for s in labels])
for klass, h in sort_byhsize(class_size.iteritems()):
print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
print
# per class details
for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
h.make_bins(50)
if len(filter(None, h.bins)) == 1:
continue
h.report("Object size for %s" % klass, usebins=True)
def revision_detail(lifetimes, classes):
# Report per-class details for any object modified more than once
for name, oids in classes.iteritems():
h = Histogram()
keep = False
for oid in dict.fromkeys(oids, 1):
L = lifetimes.get(oid)
n = len(L)
h.add(n)
if n > 1:
keep = True
if keep:
h.report("Number of revisions for %s" % name, binsize=10)
def main(path):
txn_objects = Histogram() # histogram of txn size in objects
txn_bytes = Histogram() # histogram of txn size in bytes
obj_size = Histogram() # histogram of object size
n_updates = Histogram() # oid -> num updates
n_classes = Histogram() # class -> num objects
lifetimes = {} # oid -> list of tids
class_size = {} # class -> histogram of object size
classes = {} # class -> list of oids
MAX = 0
tid = None
f = open(path, "rb")
for i, line in enumerate(f):
if MAX and i > MAX:
break
if line.startswith(" data"):
m = rx_data.search(line)
if not m:
continue
oid, klass, size = m.groups()
size = int(size)
obj_size.add(size)
n_updates.add(oid)
n_classes.add(klass)
h = class_size.get(klass)
if h is None:
h = class_size[klass] = Histogram()
h.add(size)
L = lifetimes.setdefault(oid, [])
L.append(tid)
L = classes.setdefault(klass, [])
L.append(oid)
objects += 1
elif line.startswith("Trans"):
if tid is not None:
txn_objects.add(objects)
m = rx_txn.search(line)
if not m:
continue
tid, size = m.groups()
size = int(size)
objects = 0
txn_bytes.add(size)
f.close()
print "Summary: %d txns, %d objects, %d revisions" % (
txn_objects.size(), len(n_updates), n_updates.size())
print
txn_bytes.report("Transaction size (bytes)", binsize=1024)
txn_objects.report("Transaction size (objects)", binsize=10)
obj_size.report("Object size", binsize=128)
# object lifetime info
h = Histogram()
for k, v in lifetimes.items():
h.add(len(v))
h.report("Number of revisions", binsize=10, skip=False)
# details about revisions
revision_detail(lifetimes, classes)
class_detail(class_size)
if __name__ == "__main__":
main(sys.argv[1])
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Tool to dump the last few transactions from a FileStorage."""
from ZODB.fstools import prev_txn
import binascii
import getopt
import sha
import sys
def main(path, ntxn):
f = open(path, "rb")
f.seek(0, 2)
th = prev_txn(f)
i = ntxn
while th and i > 0:
hash = sha.sha(th.get_raw_data()).digest()
l = len(str(th.get_timestamp())) + 1
th.read_meta()
print "%s: hash=%s" % (th.get_timestamp(),
binascii.hexlify(hash))
print ("user=%r description=%r length=%d"
% (th.user, th.descr, th.length))
print
th = th.prev_txn()
i -= 1
if __name__ == "__main__":
ntxn = 10
opts, args = getopt.getopt(sys.argv[1:], "n:")
path, = args
for k, v in opts:
if k == '-n':
ntxn = int(v)
main(path, ntxn)
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Simple consistency checker for FileStorage.
usage: fstest.py [-v] data.fs
The fstest tool will scan all the data in a FileStorage and report an
error if it finds any corrupt transaction data. The tool will print a
message when the first error is detected, then exit.
The tool accepts one or more -v arguments. If a single -v is used, it
will print a line of text for each transaction record it encounters.
If two -v arguments are used, it will also print a line of text for
each object. The objects for a transaction will be printed before the
transaction itself.
Note: It does not check the consistency of the object pickles. It is
possible for the damage to occur only in the part of the file that
stores object pickles. Those errors will go undetected.
"""
# The implementation is based closely on the read_index() function in
# ZODB.FileStorage. If anything about the FileStorage layout changes,
# this file will need to be udpated.
import string
import struct
import sys
class FormatError(ValueError):
"""There is a problem with the format of the FileStorage."""
class Status:
checkpoint = 'c'
undone = 'u'
packed_version = 'FS21'
TREC_HDR_LEN = 23
DREC_HDR_LEN = 42
VERBOSE = 0
def hexify(s):
"""Format an 8-bite string as hex"""
l = []
for c in s:
h = hex(ord(c))
if h[:2] == '0x':
h = h[2:]
if len(h) == 1:
l.append("0")
l.append(h)
return "0x" + string.join(l, '')
def chatter(msg, level=1):
if VERBOSE >= level:
sys.stdout.write(msg)
def U64(v):
"""Unpack an 8-byte string as a 64-bit long"""
h, l = struct.unpack(">II", v)
if h:
return (h << 32) + l
else:
return l
def check(path):
file = open(path, 'rb')
file.seek(0, 2)
file_size = file.tell()
if file_size == 0:
raise FormatError("empty file")
file.seek(0)
if file.read(4) != packed_version:
raise FormatError("invalid file header")
pos = 4L
tid = '\000' * 8 # lowest possible tid to start
i = 0
while pos:
_pos = pos
pos, tid = check_trec(path, file, pos, tid, file_size)
if tid is not None:
chatter("%10d: transaction tid %s #%d \n" %
(_pos, hexify(tid), i))
i = i + 1
def check_trec(path, file, pos, ltid, file_size):
"""Read an individual transaction record from file.
Returns the pos of the next transaction and the transaction id.
It also leaves the file pointer set to pos. The path argument is
used for generating error messages.
"""
h = file.read(TREC_HDR_LEN)
if not h:
return None, None
if len(h) != TREC_HDR_LEN:
raise FormatError("%s truncated at %s" % (path, pos))
tid, stl, status, ul, dl, el = struct.unpack(">8s8scHHH", h)
if el < 0:
el = t32 - el
tmeta_len = TREC_HDR_LEN + ul + dl + el
if tid <= ltid:
raise FormatError("%s time-stamp reduction at %s: %s <= %s" %
(path, pos, hexify(tid), hexify(ltid)))
ltid = tid
tl = U64(stl) # transaction record length - 8
if pos + tl + 8 > file_size:
raise FormatError("%s truncated possibly because of"
" damaged records at %s" % (path, pos))
if status == Status.checkpoint:
raise FormatError("%s checkpoint flag was not cleared at %s"
% (path, pos))
if status not in ' up':
raise FormatError("%s has invalid status '%s' at %s" %
(path, status, pos))
if tmeta_len > tl:
raise FormatError("%s has an invalid transaction header"
" at %s" % (path, pos))
tpos = pos
tend = tpos + tl
if status != Status.undone:
pos = tpos + tmeta_len
file.read(ul + dl + el) # skip transaction metadata
i = 0
while pos < tend:
_pos = pos
pos, oid = check_drec(path, file, pos, tpos, tid)
if pos > tend:
raise FormatError("%s has data records that extend beyond"
" the transaction record; end at %s" %
(path, pos))
chatter("%10d: object oid %s #%d\n" % (_pos, hexify(oid), i),
level=2)
i = i + 1
file.seek(tend)
rtl = file.read(8)
if rtl != stl:
raise FormatError("%s has inconsistent transaction length"
" for undone transaction at %s" % (path, pos))
pos = tend + 8
return pos, tid
def check_drec(path, file, pos, tpos, tid):
"""Check a data record for the current transaction record"""
h = file.read(DREC_HDR_LEN)
if len(h) != DREC_HDR_LEN:
raise FormatError("%s truncated at %s" % (path, pos))
oid, serial, _prev, _tloc, vlen, _plen = (
struct.unpack(">8s8s8s8sH8s", h))
prev = U64(_prev)
tloc = U64(_tloc)
plen = U64(_plen)
dlen = DREC_HDR_LEN + (plen or 8)
if vlen:
dlen = dlen + 16 + vlen
file.seek(8, 1)
pv = U64(file.read(8))
file.seek(vlen, 1) # skip the version data
if tloc != tpos:
raise FormatError("%s data record exceeds transaction record "
"at %s: tloc %d != tpos %d" %
(path, pos, tloc, tpos))
pos = pos + dlen
# XXX is the following code necessary?
if plen:
file.seek(plen, 1)
else:
file.seek(8, 1)
# XXX _loadBack() ?
return pos, oid
def usage():
print __doc__
sys.exit(-1)
if __name__ == "__main__":
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'v')
if len(args) != 1:
raise ValueError, "expected one argument"
for k, v in opts:
if k == '-v':
VERBOSE = VERBOSE + 1
except (getopt.error, ValueError):
usage()
try:
check(args[0])
except FormatError, msg:
print msg
sys.exit(-1)
chatter("no errors detected")
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2001, 2002, 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""A script to gather statistics while doing a storage migration.
This is very similar to a standard storage's copyTransactionsFrom() method,
except that it's geared to run as a script, and it collects useful pieces of
information as it's working. This script can be used to stress test a storage
since it blasts transactions at it as fast as possible. You can get a good
sense of the performance of a storage by running this script.
Actually it just counts the size of pickles in the transaction via the
iterator protocol, so storage overheads aren't counted.
Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args]
Options:
-S sourcetype
--stype=sourcetype
This is the name of a recognized type for the source database. Use -T
to print out the known types. Defaults to "file".
-D desttype
--dtype=desttype
This is the name of the recognized type for the destination database.
Use -T to print out the known types. Defaults to "file".
-o filename
--output=filename
Print results in filename, otherwise stdout.
-m txncount
--max=txncount
Stop after committing txncount transactions.
-k txncount
--skip=txncount
Skip the first txncount transactions.
-p/--profile
Turn on specialized profiling.
-t/--timestamps
Print tids as timestamps.
-T/--storage_types
Print all the recognized storage types and exit.
-v/--verbose
Turns on verbose output. Multiple -v options increase the verbosity.
-h/--help
Print this message and exit.
Positional arguments:
source-storage-args:
Semicolon separated list of arguments for the source storage, as
key=val pairs. E.g. "file_name=Data.fs;read_only=1"
destination-storage-args:
Comma separated list of arguments for the source storage, as key=val
pairs. E.g. "name=full;frequency=3600"
"""
import re
import sys
import time
import getopt
import marshal
import profile
from ZODB import utils
from ZODB import StorageTypes
from ZODB.TimeStamp import TimeStamp
PROGRAM = sys.argv[0]
ZERO = '\0'*8
try:
True, False
except NameError:
True = 1
False = 0
def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
sys.exit(code)
def error(code, msg):
print >> sys.stderr, msg
print "use --help for usage message"
sys.exit(code)
def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'hvo:pm:k:D:S:Tt',
['help', 'verbose',
'output=', 'profile', 'storage_types',
'max=', 'skip=', 'dtype=', 'stype=', 'timestamps'])
except getopt.error, msg:
error(2, msg)
class Options:
stype = 'FileStorage'
dtype = 'FileStorage'
verbose = 0
outfile = None
profilep = False
maxtxn = -1
skiptxn = -1
timestamps = False
options = Options()
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-v', '--verbose'):
options.verbose += 1
elif opt in ('-T', '--storage_types'):
print_types()
sys.exit(0)
elif opt in ('-S', '--stype'):
options.stype = arg
elif opt in ('-D', '--dtype'):
options.dtype = arg
elif opt in ('-o', '--output'):
options.outfile = arg
elif opt in ('-p', '--profile'):
options.profilep = True
elif opt in ('-m', '--max'):
options.maxtxn = int(arg)
elif opt in ('-k', '--skip'):
options.skiptxn = int(arg)
elif opt in ('-t', '--timestamps'):
options.timestamps = True
if len(args) > 2:
error(2, "too many arguments")
srckws = {}
if len(args) > 0:
srcargs = args[0]
for kv in re.split(r';\s*', srcargs):
key, val = kv.split('=')
srckws[key] = val
destkws = {}
if len(args) > 1:
destargs = args[1]
for kv in re.split(r';\s*', destargs):
key, val = kv.split('=')
destkws[key] = val
if options.stype not in StorageTypes.storage_types.keys():
usage(2, 'Source database type must be provided')
if options.dtype not in StorageTypes.storage_types.keys():
usage(2, 'Destination database type must be provided')
# Open the output file
if options.outfile is None:
options.outfp = sys.stdout
options.outclosep = False
else:
options.outfp = open(options.outfile, 'w')
options.outclosep = True
if options.verbose > 0:
print 'Opening source database...'
modname, sconv = StorageTypes.storage_types[options.stype]
kw = sconv(**srckws)
__import__(modname)
sclass = getattr(sys.modules[modname], options.stype)
srcdb = sclass(**kw)
if options.verbose > 0:
print 'Opening destination database...'
modname, dconv = StorageTypes.storage_types[options.dtype]
kw = dconv(**destkws)
__import__(modname)
dclass = getattr(sys.modules[modname], options.dtype)
dstdb = dclass(**kw)
try:
t0 = time.time()
doit(srcdb, dstdb, options)
t1 = time.time()
if options.verbose > 0:
print 'Migration time: %8.3f' % (t1-t0)
finally:
# Done
srcdb.close()
dstdb.close()
if options.outclosep:
options.outfp.close()
def doit(srcdb, dstdb, options):
outfp = options.outfp
profilep = options.profilep
verbose = options.verbose
# some global information
largest_pickle = 0
largest_txn_in_size = 0
largest_txn_in_objects = 0
total_pickle_size = 0L
total_object_count = 0
# Ripped from BaseStorage.copyTransactionsFrom()
ts = None
ok = True
prevrevids = {}
counter = 0
skipper = 0
if options.timestamps:
print "%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % (
"NUM", "TID AS TIMESTAMP", "OBJS", "BYTES",
# Does anybody know what these times mean?
"t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
else:
print "%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % (
"NUM", "TRANSACTION ID", "OBJS", "BYTES",
# Does anybody know what these times mean?
"t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
for txn in srcdb.iterator():
skipper += 1
if skipper <= options.skiptxn:
continue
counter += 1
if counter > options.maxtxn >= 0:
break
tid = txn.tid
if ts is None:
ts = TimeStamp(tid)
else:
t = TimeStamp(tid)
if t <= ts:
if ok:
print >> sys.stderr, \
'Time stamps are out of order %s, %s' % (ts, t)
ok = False
ts = t.laterThan(ts)
tid = `ts`
else:
ts = t
if not ok:
print >> sys.stderr, \
'Time stamps are back in order %s' % t
ok = True
if verbose > 1:
print ts
prof = None
if profilep and (counter % 100) == 0:
prof = profile.Profile()
objects = 0
size = 0
newrevids = RevidAccumulator()
t0 = time.time()
dstdb.tpc_begin(txn, tid, txn.status)
t1 = time.time()
for r in txn:
oid = r.oid
objects += 1
thissize = len(r.data)
size += thissize
if thissize > largest_pickle:
largest_pickle = thissize
if verbose > 1:
if not r.version:
vstr = 'norev'
else:
vstr = r.version
print utils.U64(oid), vstr, len(r.data)
oldrevid = prevrevids.get(oid, ZERO)
result = dstdb.store(oid, oldrevid, r.data, r.version, txn)
newrevids.store(oid, result)
t2 = time.time()
result = dstdb.tpc_vote(txn)
t3 = time.time()
newrevids.tpc_vote(result)
prevrevids.update(newrevids.get_dict())
# Profile every 100 transactions
if prof:
prof.runcall(dstdb.tpc_finish, txn)
else:
dstdb.tpc_finish(txn)
t4 = time.time()
# record the results
if objects > largest_txn_in_objects:
largest_txn_in_objects = objects
if size > largest_txn_in_size:
largest_txn_in_size = size
if options.timestamps:
tidstr = str(TimeStamp(tid))
format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f"
else:
tidstr = utils.U64(tid)
format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f"
print >> outfp, format % (skipper, tidstr, objects, size,
t4-t0, t1-t0, t2-t1, t3-t2, t4-t3)
total_pickle_size += size
total_object_count += objects
if prof:
prof.create_stats()
fp = open('profile-%02d.txt' % (counter / 100), 'wb')
marshal.dump(prof.stats, fp)
fp.close()
print >> outfp, "Largest pickle: %8d" % largest_pickle
print >> outfp, "Largest transaction: %8d" % largest_txn_in_size
print >> outfp, "Largest object count: %8d" % largest_txn_in_objects
print >> outfp, "Total pickle size: %14d" % total_pickle_size
print >> outfp, "Total object count: %8d" % total_object_count
# helper to deal with differences between old-style store() return and
# new-style store() return that supports ZEO
import types
class RevidAccumulator:
def __init__(self):
self.data = {}
def _update_from_list(self, list):
for oid, serial in list:
if not isinstance(serial, types.StringType):
raise serial
self.data[oid] = serial
def store(self, oid, result):
if isinstance(result, types.StringType):
self.data[oid] = result
elif result is not None:
self._update_from_list(result)
def tpc_vote(self, result):
if result is not None:
self._update_from_list(result)
def get_dict(self):
return self.data
if __name__ == '__main__':
main()
#!/usr/bin/env python2.3
"""Report on the net size of objects counting subobjects.
usage: netspace.py [-P | -v] data.fs
-P: do a pack first
-v: print info for all objects, even if a traversal path isn't found
"""
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
from ZODB.referencesf import referencesf
def find_paths(root, maxdist):
"""Find Python attribute traversal paths for objects to maxdist distance.
Starting at a root object, traverse attributes up to distance levels
from the root, looking for persistent objects. Return a dict
mapping oids to traversal paths.
XXX Assumes that the keys of the root are not themselves
persistent objects.
XXX Doesn't traverse containers.
"""
paths = {}
# Handle the root as a special case because it's a dict
objs = []
for k, v in root.items():
oid = getattr(v, '_p_oid', None)
objs.append((k, v, oid, 0))
for path, obj, oid, dist in objs:
if oid is not None:
paths[oid] = path
if dist < maxdist:
getattr(obj, 'foo', None) # unghostify
try:
items = obj.__dict__.items()
except AttributeError:
continue
for k, v in items:
oid = getattr(v, '_p_oid', None)
objs.append(("%s.%s" % (path, k), v, oid, dist + 1))
return paths
def main(path):
fs = FileStorage(path, read_only=1)
if PACK:
fs.pack()
db = ZODB.DB(fs)
rt = db.open().root()
paths = find_paths(rt, 3)
def total_size(oid):
cache = {}
cache_size = 1000
def _total_size(oid, seen):
v = cache.get(oid)
if v is not None:
return v
data, serialno = fs.load(oid, '')
size = len(data)
for suboid in referencesf(data):
if seen.has_key(suboid):
continue
seen[suboid] = 1
size += _total_size(suboid, seen)
cache[oid] = size
if len(cache) == cache_size:
cache.popitem()
return size
return _total_size(oid, {})
keys = fs._index.keys()
keys.sort()
keys.reverse()
if not VERBOSE:
# If not running verbosely, don't print an entry for an object
# unless it has an entry in paths.
keys = filter(paths.has_key, keys)
fmt = "%8s %5d %8d %s %s.%s"
for oid in keys:
data, serialno = fs.load(oid, '')
mod, klass = get_pickle_metadata(data)
refs = referencesf(data)
path = paths.get(oid, '-')
print fmt % (U64(oid), len(data), total_size(oid), path, mod, klass)
if __name__ == "__main__":
import sys
import getopt
PACK = 0
VERBOSE = 0
try:
opts, args = getopt.getopt(sys.argv[1:], 'Pv')
path, = args
except getopt.error, err:
print err
print __doc__
sys.exit(2)
except ValueError:
print "expected one argument, got", len(args)
print __doc__
sys.exit(2)
for o, v in opts:
if o == '-P':
PACK = 1
if o == '-v':
VERBOSE += 1
main(path)
#!/usr/bin/env python2.3
"""Parse the BLATHER logging generated by ZEO2.
An example of the log format is:
2002-04-15T13:05:29 BLATHER(-100) ZEO Server storea(3235680, [714], 235339406490168806) ('10.0.26.30', 45514)
"""
import re
import time
rx_time = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
def parse_time(line):
"""Return the time portion of a zLOG line in seconds or None."""
mo = rx_time.match(line)
if mo is None:
return None
date, time_ = mo.group(1, 2)
date_l = [int(elt) for elt in date.split('-')]
time_l = [int(elt) for elt in time_.split(':')]
return int(time.mktime(date_l + time_l + [0, 0, 0]))
rx_meth = re.compile("zrpc:\d+ calling (\w+)\((.*)")
def parse_method(line):
pass
def parse_line(line):
"""Parse a log entry and return time, method info, and client."""
t = parse_time(line)
if t is None:
return None, None
mo = rx_meth.search(line)
if mo is None:
return None, None
meth_name = mo.group(1)
meth_args = mo.group(2).strip()
if meth_args.endswith(')'):
meth_args = meth_args[:-1]
meth_args = [s.strip() for s in meth_args.split(",")]
m = meth_name, tuple(meth_args)
return t, m
class TStats:
counter = 1
def __init__(self):
self.id = TStats.counter
TStats.counter += 1
fields = ("time", "vote", "done", "user", "path")
fmt = "%-24s %5s %5s %-15s %s"
hdr = fmt % fields
def report(self):
"""Print a report about the transaction"""
t = time.ctime(self.begin)
if hasattr(self, "vote"):
d_vote = self.vote - self.begin
else:
d_vote = "*"
if hasattr(self, "finish"):
d_finish = self.finish - self.begin
else:
d_finish = "*"
print self.fmt % (time.ctime(self.begin), d_vote, d_finish,
self.user, self.url)
class TransactionParser:
def __init__(self):
self.txns = {}
self.skipped = 0
def parse(self, line):
t, m = parse_line(line)
if t is None:
return
name = m[0]
meth = getattr(self, name, None)
if meth is not None:
meth(t, m[1])
def tpc_begin(self, time, args):
t = TStats()
t.begin = time
t.user = args[1]
t.url = args[2]
t.objects = []
tid = eval(args[0])
self.txns[tid] = t
def get_txn(self, args):
tid = eval(args[0])
try:
return self.txns[tid]
except KeyError:
print "uknown tid", repr(tid)
return None
def tpc_finish(self, time, args):
t = self.get_txn(args)
if t is None:
return
t.finish = time
def vote(self, time, args):
t = self.get_txn(args)
if t is None:
return
t.vote = time
def get_txns(self):
L = [(t.id, t) for t in self.txns.values()]
L.sort()
return [t for (id, t) in L]
if __name__ == "__main__":
import fileinput
p = TransactionParser()
i = 0
for line in fileinput.input():
i += 1
try:
p.parse(line)
except:
print "line", i
raise
print "Transaction: %d" % len(p.txns)
print TStats.hdr
for txn in p.get_txns():
txn.report()
#!/usr/bin/env python2.3
# repozo.py -- incremental and full backups of a Data.fs file.
#
# Originally written by Anthony Baxter
# Significantly modified by Barry Warsaw
"""repozo.py -- incremental and full backups of a Data.fs file.
Usage: %(program)s [options]
Where:
Exactly one of -B or -R must be specified:
-B / --backup
Backup current ZODB file.
-R / --recover
Restore a ZODB file from a backup.
-v / --verbose
Verbose mode.
-h / --help
Print this text and exit.
-r dir
--repository=dir
Repository directory containing the backup files. This argument
is required.
Options for -B/--backup:
-f file
--file=file
Source Data.fs file. This argument is required.
-F / --full
Force a full backup. By default, an incremental backup is made
if possible (e.g., if a pack has occurred since the last
incremental backup, a full backup is necessary).
-Q / --quick
Verify via md5 checksum only the last incremental written. This
significantly reduces the disk i/o at the (theoretical) cost of
inconsistency. This is a probabilistic way of determining whether
a full backup is necessary.
-z / --gzip
Compress with gzip the backup files. Uses the default zlib
compression level. By default, gzip compression is not used.
Options for -R/--recover:
-D str
--date=str
Recover state as of this date. str is in the format
yyyy-mm-dd[-hh[-mm]]
By default, current time is used.
-o filename
--output=filename
Write recovered ZODB to given file. By default, the file is
written to stdout.
"""
import os
import sys
import md5
import gzip
import time
import errno
import getopt
from ZODB.FileStorage import FileStorage
program = sys.argv[0]
try:
True, False
except NameError:
True = 1
False = 0
BACKUP = 1
RECOVER = 2
COMMASPACE = ', '
READCHUNK = 16 * 1024
VERBOSE = False
def usage(code, msg=''):
outfp = sys.stderr
if code == 0:
outfp = sys.stdout
print >> outfp, __doc__ % globals()
if msg:
print >> outfp, msg
sys.exit(code)
def log(msg, *args):
if VERBOSE:
# Use stderr here so that -v flag works with -R and no -o
print >> sys.stderr, msg % args
def parseargs():
global VERBOSE
try:
opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Qz',
['backup', 'recover', 'verbose', 'help',
'file=', 'repository=', 'full', 'date=',
'output=', 'quick', 'gzip'])
except getopt.error, msg:
usage(1, msg)
class Options:
mode = None # BACKUP or RECOVER
file = None # name of input Data.fs file
repository = None # name of directory holding backups
full = False # True forces full backup
date = None # -D argument, if any
output = None # where to write recovered data; None = stdout
quick = False # -Q flag state
gzip = False # -z flag state
options = Options()
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-v', '--verbose'):
VERBOSE = True
elif opt in ('-R', '--recover'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
options.mode = RECOVER
elif opt in ('-B', '--backup'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
options.mode = BACKUP
elif opt in ('-Q', '--quick'):
options.quick = True
elif opt in ('-f', '--file'):
options.file = arg
elif opt in ('-r', '--repository'):
options.repository = arg
elif opt in ('-F', '--full'):
options.full = True
elif opt in ('-D', '--date'):
options.date = arg
elif opt in ('-o', '--output'):
options.output = arg
elif opt in ('-z', '--gzip'):
options.gzip = True
else:
assert False, (opt, arg)
# Any other arguments are invalid
if args:
usage(1, 'Invalid arguments: ' + COMMASPACE.join(args))
# Sanity checks
if options.mode is None:
usage(1, 'Either --backup or --recover is required')
if options.repository is None:
usage(1, '--repository is required')
if options.mode == BACKUP:
if options.date is not None:
log('--date option is ignored in backup mode')
options.date = None
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
else:
assert options.mode == RECOVER
if options.file is not None:
log('--file option is ignored in recover mode')
options.file = None
return options
# Read bytes (no more than n, or to EOF if n is None) in chunks from the
# current position in file fp. Pass each chunk as an argument to func().
# Return the total number of bytes read == the total number of bytes
# passed in all to func(). Leaves the file position just after the
# last byte read.
def dofile(func, fp, n=None):
bytesread = 0L
while n is None or n > 0:
if n is None:
todo = READCHUNK
else:
todo = min(READCHUNK, n)
data = fp.read(todo)
if not data:
break
func(data)
nread = len(data)
bytesread += nread
if n is not None:
n -= nread
return bytesread
def checksum(fp, n):
# Checksum the first n bytes of the specified file
sum = md5.new()
def func(data):
sum.update(data)
dofile(func, fp, n)
return sum.hexdigest()
def copyfile(options, dst, start, n):
# Copy bytes from file src, to file dst, starting at offset start, for n
# length of bytes
sum = md5.new()
ifp = open(options.file, 'rb')
ifp.seek(start)
if options.gzip:
ofp = gzip.open(dst, 'wb')
else:
ofp = open(dst, 'wb')
def func(data):
sum.update(data)
ofp.write(data)
ndone = dofile(func, ifp, n)
ofp.close()
ifp.close()
assert ndone == n
return sum.hexdigest()
def concat(files, ofp=None):
# Concatenate a bunch of files from the repository, output to `outfile' if
# given. Return the number of bytes written and the md5 checksum of the
# bytes.
sum = md5.new()
def func(data):
sum.update(data)
if ofp:
ofp.write(data)
bytesread = 0
for f in files:
# Auto uncompress
if f.endswith('fsz'):
ifp = gzip.open(f, 'rb')
else:
ifp = open(f, 'rb')
bytesread += dofile(func, ifp)
ifp.close()
if ofp:
ofp.close()
return bytesread, sum.hexdigest()
def gen_filename(options, ext=None):
if ext is None:
if options.full:
ext = '.fs'
else:
ext = '.deltafs'
if options.gzip:
ext += 'z'
t = time.gmtime()[:6] + (ext,)
return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
def find_files(options):
def rootcmp(x, y):
# This already compares in reverse order
return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
# Return a list of files needed to reproduce state at time `when'
when = options.date
if not when:
when = gen_filename(options, '')
log('looking for files b/w last full backup and %s...', when)
all = os.listdir(options.repository)
all.sort(rootcmp)
# Find the last full backup before date, then include all the incrementals
# between when and that full backup.
needed = []
for file in all:
root, ext = os.path.splitext(file)
if root <= when:
needed.append(file)
if ext in ('.fs', '.fsz'):
break
# Make the file names relative to the repository directory
needed = [os.path.join(options.repository, f) for f in needed]
# Restore back to chronological order
needed.reverse()
if needed:
log('files needed to recover state as of %s:', when)
for f in needed:
log('\t%s', f)
else:
log('no files found')
return needed
# Scan the .dat file corresponding to the last full backup performed.
# Return
#
# filename, startpos, endpos, checksum
#
# of the last incremental. If there is no .dat file, or the .dat file
# is empty, return
#
# None, None, None, None
def scandat(repofiles):
fullfile = repofiles[0]
datfile = os.path.splitext(fullfile)[0] + '.dat'
fn = startpos = endpos = sum = None # assume .dat file missing or empty
try:
fp = open(datfile)
except IOError, e:
if e.errno <> errno.ENOENT:
raise
else:
# We only care about the last one.
lines = fp.readlines()
fp.close()
if lines:
fn, startpos, endpos, sum = lines[-1].split()
startpos = long(startpos)
endpos = long(endpos)
return fn, startpos, endpos, sum
def do_full_backup(options):
# Find the file position of the last completed transaction.
fs = FileStorage(options.file, read_only=True)
# Note that the FileStorage ctor calls read_index() which scans the file
# and returns "the position just after the last valid transaction record".
# getSize() then returns this position, which is exactly what we want,
# because we only want to copy stuff from the beginning of the file to the
# last valid transaction record.
pos = fs.getSize()
fs.close()
options.full = True
dest = os.path.join(options.repository, gen_filename(options))
if os.path.exists(dest):
print >> sys.stderr, 'Cannot overwrite existing file:', dest
sys.exit(2)
log('writing full backup: %s bytes to %s', pos, dest)
sum = copyfile(options, dest, 0, pos)
# Write the data file for this full backup
datfile = os.path.splitext(dest)[0] + '.dat'
fp = open(datfile, 'w')
print >> fp, dest, 0, pos, sum
fp.close()
def do_incremental_backup(options, reposz, repofiles):
# Find the file position of the last completed transaction.
fs = FileStorage(options.file, read_only=True)
# Note that the FileStorage ctor calls read_index() which scans the file
# and returns "the position just after the last valid transaction record".
# getSize() then returns this position, which is exactly what we want,
# because we only want to copy stuff from the beginning of the file to the
# last valid transaction record.
pos = fs.getSize()
fs.close()
options.full = False
dest = os.path.join(options.repository, gen_filename(options))
if os.path.exists(dest):
print >> sys.stderr, 'Cannot overwrite existing file:', dest
sys.exit(2)
log('writing incremental: %s bytes to %s', pos-reposz, dest)
sum = copyfile(options, dest, reposz, pos - reposz)
# The first file in repofiles points to the last full backup. Use this to
# get the .dat file and append the information for this incrementatl to
# that file.
fullfile = repofiles[0]
datfile = os.path.splitext(fullfile)[0] + '.dat'
# This .dat file better exist. Let the exception percolate if not.
fp = open(datfile, 'a')
print >> fp, dest, reposz, pos, sum
fp.close()
def do_backup(options):
repofiles = find_files(options)
# See if we need to do a full backup
if options.full or not repofiles:
log('doing a full backup')
do_full_backup(options)
return
srcsz = os.path.getsize(options.file)
if options.quick:
fn, startpos, endpos, sum = scandat(repofiles)
# If the .dat file was missing, or was empty, do a full backup
if (fn, startpos, endpos, sum) == (None, None, None, None):
log('missing or empty .dat file (full backup)')
do_full_backup(options)
return
# Has the file shrunk, possibly because of a pack?
if srcsz < endpos:
log('file shrunk, possibly because of a pack (full backup)')
do_full_backup(options)
return
# Now check the md5 sum of the source file, from the last
# incremental's start and stop positions.
srcfp = open(options.file, 'rb')
srcfp.seek(startpos)
srcsum = checksum(srcfp, endpos-startpos)
srcfp.close()
log('last incremental file: %s', fn)
log('last incremental checksum: %s', sum)
log('source checksum range: [%s..%s], sum: %s',
startpos, endpos, srcsum)
if sum == srcsum:
if srcsz == endpos:
log('No changes, nothing to do')
return
log('doing incremental, starting at: %s', endpos)
do_incremental_backup(options, endpos, repofiles)
return
else:
# This was is much slower, and more disk i/o intensive, but it's also
# more accurate since it checks the actual existing files instead of
# the information in the .dat file.
#
# See if we can do an incremental, based on the files that already
# exist. This call of concat() will not write an output file.
reposz, reposum = concat(repofiles)
log('repository state: %s bytes, md5: %s', reposz, reposum)
# Get the md5 checksum of the source file, up to two file positions:
# the entire size of the file, and up to the file position of the last
# incremental backup.
srcfp = open(options.file, 'rb')
srcsum = checksum(srcfp, srcsz)
srcfp.seek(0)
srcsum_backedup = checksum(srcfp, reposz)
srcfp.close()
log('current state : %s bytes, md5: %s', srcsz, srcsum)
log('backed up state : %s bytes, md5: %s', reposz, srcsum_backedup)
# Has nothing changed?
if srcsz == reposz and srcsum == reposum:
log('No changes, nothing to do')
return
# Has the file shrunk, probably because of a pack?
if srcsz < reposz:
log('file shrunk, possibly because of a pack (full backup)')
do_full_backup(options)
return
# The source file is larger than the repository. If the md5 checksums
# match, then we know we can do an incremental backup. If they don't,
# then perhaps the file was packed at some point (or a
# non-transactional undo was performed, but this is deprecated). Only
# do a full backup if forced to.
#
# XXX For ZODB4, this needs to take into account the storage metadata
# header that FileStorage has grown at the front of the file.
if reposum == srcsum_backedup:
log('doing incremental, starting at: %s', reposz)
do_incremental_backup(options, reposz, repofiles)
return
# The checksums don't match, meaning the front of the source file has
# changed. We'll need to do a full backup in that case.
log('file changed, possibly because of a pack (full backup)')
do_full_backup(options)
def do_recover(options):
# Find the first full backup at or before the specified date
repofiles = find_files(options)
if not repofiles:
if options.date:
log('No files in repository before %s', options.date)
else:
log('No files in repository')
return
if options.output is None:
log('Recovering file to stdout')
outfp = sys.stdout
else:
log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb')
reposz, reposum = concat(repofiles, outfp)
if outfp <> sys.stdout:
outfp.close()
log('Recovered %s bytes, md5: %s', reposz, reposum)
def main():
options = parseargs()
if options.mode == BACKUP:
do_backup(options)
else:
assert options.mode == RECOVER
do_recover(options)
if __name__ == '__main__':
main()
#!/usr/bin/env python2.3
"""Report on the space used by objects in a storage.
usage: space.py data.fs
The current implementation only supports FileStorage.
Current limitations / simplifications: Ignores revisions and versions.
"""
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
def run(path, v=0):
fs = FileStorage(path, read_only=1)
# break into the file implementation
if hasattr(fs._index, 'iterkeys'):
iter = fs._index.iterkeys()
else:
iter = fs._index.keys()
totals = {}
for oid in iter:
data, serialno = fs.load(oid, '')
mod, klass = get_pickle_metadata(data)
key = "%s.%s" % (mod, klass)
bytes, count = totals.get(key, (0, 0))
bytes += len(data)
count += 1
totals[key] = bytes, count
if v:
print "%8s %5d %s" % (U64(oid), len(data), key)
L = totals.items()
L.sort(lambda a, b: cmp(a[1], b[1]))
L.reverse()
print "Totals per object class:"
for key, (bytes, count) in L:
print "%8d %8d %s" % (count, bytes, key)
def main():
import sys
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "v")
except getopt.error, msg:
print msg
print "usage: space.py [-v] Data.fs"
sys.exit(2)
if len(args) != 1:
print "usage: space.py [-v] Data.fs"
sys.exit(2)
v = 0
for o, a in opts:
if o == "-v":
v += 1
path = args[0]
run(path, v)
if __name__ == "__main__":
main()
"""Verify that fstest.py can find errors.
XXX To run this test script fstest.py must be on your PYTHONPATH.
"""
from cStringIO import StringIO
import os
import re
import struct
import tempfile
import unittest
import fstest
from fstest import FormatError, U64
class TestCorruptedFS(unittest.TestCase):
# XXX path?
f = open('test-checker.fs', 'rb')
datafs = f.read()
f.close()
del f
def setUp(self):
self._temp = tempfile.mktemp()
self._file = open(self._temp, 'wb')
def tearDown(self):
if not self._file.closed:
self._file.close()
if os.path.exists(self._temp):
try:
os.remove(self._temp)
except os.error:
pass
def noError(self):
if not self._file.closed:
self._file.close()
fstest.check(self._temp)
def detectsError(self, rx):
if not self._file.closed:
self._file.close()
try:
fstest.check(self._temp)
except FormatError, msg:
mo = re.search(rx, str(msg))
self.failIf(mo is None, "unexpected error: %s" % msg)
else:
self.fail("fstest did not detect corruption")
def getHeader(self):
buf = self._datafs.read(16)
if not buf:
return 0, ''
tl = U64(buf[8:])
return tl, buf
def copyTransactions(self, n):
"""Copy at most n transactions from the good data"""
f = self._datafs = StringIO(self.datafs)
self._file.write(f.read(4))
for i in range(n):
tl, data = self.getHeader()
if not tl:
return
self._file.write(data)
rec = f.read(tl - 8)
self._file.write(rec)
def testGood(self):
self._file.write(self.datafs)
self.noError()
def testTwoTransactions(self):
self.copyTransactions(2)
self.noError()
def testEmptyFile(self):
self.detectsError("empty file")
def testInvalidHeader(self):
self._file.write('SF12')
self.detectsError("invalid file header")
def testTruncatedTransaction(self):
self._file.write(self.datafs[:4+22])
self.detectsError("truncated")
def testCheckpointFlag(self):
self.copyTransactions(2)
tl, data = self.getHeader()
assert tl > 0, "ran out of good transaction data"
self._file.write(data)
self._file.write('c')
self._file.write(self._datafs.read(tl - 9))
self.detectsError("checkpoint flag")
def testInvalidStatus(self):
self.copyTransactions(2)
tl, data = self.getHeader()
assert tl > 0, "ran out of good transaction data"
self._file.write(data)
self._file.write('Z')
self._file.write(self._datafs.read(tl - 9))
self.detectsError("invalid status")
def testTruncatedRecord(self):
self.copyTransactions(3)
tl, data = self.getHeader()
assert tl > 0, "ran out of good transaction data"
self._file.write(data)
buf = self._datafs.read(tl / 2)
self._file.write(buf)
self.detectsError("truncated possibly")
def testBadLength(self):
self.copyTransactions(2)
tl, data = self.getHeader()
assert tl > 0, "ran out of good transaction data"
self._file.write(data)
buf = self._datafs.read(tl - 8)
self._file.write(buf[0])
assert tl <= 1<<16, "can't use this transaction for this test"
self._file.write("\777\777")
self._file.write(buf[3:])
self.detectsError("invalid transaction header")
def testDecreasingTimestamps(self):
self.copyTransactions(0)
tl, data = self.getHeader()
buf = self._datafs.read(tl - 8)
t1 = data + buf
tl, data = self.getHeader()
buf = self._datafs.read(tl - 8)
t2 = data + buf
self._file.write(t2[:8] + t1[8:])
self._file.write(t1[:8] + t2[8:])
self.detectsError("time-stamp")
def testTruncatedData(self):
# This test must re-write the transaction header length in
# order to trigger the error in check_drec(). If it doesn't,
# the truncated data record would also caught a truncated
# transaction record.
self.copyTransactions(1)
tl, data = self.getHeader()
pos = self._file.tell()
self._file.write(data)
buf = self._datafs.read(tl - 8)
hdr = buf[:15]
ul, dl, el = struct.unpack(">HHH", hdr[-6:])
self._file.write(buf[:15 + ul + dl + el])
data = buf[15 + ul + dl + el:]
self._file.write(data[:24])
self._file.seek(pos + 8, 0)
newlen = struct.pack(">II", 0, tl - (len(data) - 24))
self._file.write(newlen)
self.detectsError("truncated at")
def testBadDataLength(self):
self.copyTransactions(1)
tl, data = self.getHeader()
self._file.write(data)
buf = self._datafs.read(tl - 8)
hdr = buf[:7]
# write the transaction meta data
ul, dl, el = struct.unpack(">HHH", hdr[-6:])
self._file.write(buf[:7 + ul + dl + el])
# write the first part of the data header
data = buf[7 + ul + dl + el:]
self._file.write(data[:24])
self._file.write("\000" * 4 + "\077" + "\000" * 3)
self._file.write(data[32:])
self.detectsError("record exceeds transaction")
if __name__ == "__main__":
unittest.main()
# Some simple tests for zeopack.py
# For this to work, zeopack.py must by on your PATH.
from ZODB.FileStorage import FileStorage
from ZODB.tests.StorageTestBase import StorageTestBase
from ZEO.tests import forker
import ZODB
import os
import socket
import tempfile
import threading
import time
import unittest
# XXX The forker interface isn't clearly defined. It's different on
# different branches of ZEO. This will break someday.
# XXX Only handle the Unix variant of the forker. Just to give Tim
# something to do.
class PackerTests(StorageTestBase):
def setUp(self):
self.started = 0
def start(self):
self.started =1
self.path = tempfile.mktemp(suffix=".fs")
self._storage = FileStorage(self.path)
self.db = ZODB.DB(self._storage)
self.do_updates()
self.pid, self.exit = forker.start_zeo_server(self._storage, self.addr)
def do_updates(self):
for i in range(100):
self._dostore()
def tearDown(self):
if not self.started:
return
self.db.close()
self._storage.close()
self.exit.close()
try:
os.kill(self.pid, 9)
except os.error:
pass
try:
os.waitpid(self.pid, 0)
except os.error, err:
##print "waitpid failed", err
pass
for ext in '', '.old', '.lock', '.index', '.tmp':
path = self.path + ext
try:
os.remove(path)
except os.error:
pass
def set_inet_addr(self):
self.host = socket.gethostname()
self.port = forker.get_port()
self.addr = self.host, self.port
def testPack(self):
self.set_inet_addr()
self.start()
status = os.system("zeopack.py -h %s -p %s" % (self.host, self.port))
assert status == 0
assert os.path.exists(self.path + ".old")
def testPackDays(self):
self.set_inet_addr()
self.start()
status = os.system("zeopack.py -h %s -p %s -d 1" % (self.host,
self.port))
# Since we specified one day, nothing should get packed
assert status == 0
assert not os.path.exists(self.path + ".old")
def testAF_UNIXPack(self):
self.addr = tempfile.mktemp(suffix=".zeo-socket")
self.start()
status = os.system("zeopack.py -U %s" % self.addr)
assert status == 0
assert os.path.exists(self.path + ".old")
def testNoServer(self):
status = os.system("zeopack.py -p 19")
assert status != 0
def testWaitForServer(self):
self.set_inet_addr()
def delayed_start():
time.sleep(11)
self.start()
t = threading.Thread(target=delayed_start)
t.start()
status = os.system("zeopack.py -h %s -p %s -W" % (self.host,
self.port))
t.join()
assert status == 0
assert os.path.exists(self.path + ".old")
class UpTest(unittest.TestCase):
def testUp(self):
status = os.system("zeoup.py -p 19")
# There is no ZEO server on port 19, so we should see non-zero
# exit status.
assert status != 0
if __name__ == "__main__":
unittest.main()
#!/usr/bin/env python2.3
"""Transaction timeout test script.
This script connects to a storage, begins a transaction, calls store()
and tpc_vote(), and then sleeps forever. This should trigger the
transaction timeout feature of the server.
usage: timeout.py address delay [storage-name]
"""
import sys
import time
from ZODB.Transaction import Transaction
from ZODB.tests.MinPO import MinPO
from ZODB.tests.StorageTestBase import zodb_pickle
from ZEO.ClientStorage import ClientStorage
ZERO = '\0'*8
def main():
if len(sys.argv) not in (3, 4):
sys.stderr.write("Usage: timeout.py address delay [storage-name]\n" %
sys.argv[0])
sys.exit(2)
hostport = sys.argv[1]
delay = float(sys.argv[2])
if sys.argv[3:]:
name = sys.argv[3]
else:
name = "1"
if "/" in hostport:
address = hostport
else:
if ":" in hostport:
i = hostport.index(":")
host, port = hostport[:i], hostport[i+1:]
else:
host, port = "", hostport
port = int(port)
address = (host, port)
print "Connecting to %s..." % repr(address)
storage = ClientStorage(address, name)
print "Connected. Now starting a transaction..."
oid = storage.new_oid()
version = ""
revid = ZERO
data = MinPO("timeout.py")
pickled_data = zodb_pickle(data)
t = Transaction()
t.user = "timeout.py"
storage.tpc_begin(t)
storage.store(oid, revid, pickled_data, version, t)
print "Stored. Now voting..."
storage.tpc_vote(t)
print "Voted; now sleeping %s..." % delay
time.sleep(delay)
print "Done."
if __name__ == "__main__":
main()
#!/usr/bin/env python2.3
"""Connect to a ZEO server and ask it to pack.
Usage: zeopack.py [options]
Options:
-p port -- port to connect to
-h host -- host to connect to (default is current host)
-U path -- Unix-domain socket to connect to
-S name -- storage name (default is '1')
-d days -- pack objects more than days old
-1 -- Connect to a ZEO 1 server
-W -- wait for server to come up. Normally the script tries to
connect for 10 seconds, then exits with an error. The -W
option is only supported with ZEO 1.
You must specify either -p and -h or -U.
"""
import getopt
import socket
import sys
import time
from ZEO.ClientStorage import ClientStorage
WAIT = 10 # wait no more than 10 seconds for client to connect
def connect(storage):
# The connect-on-startup logic that ZEO provides isn't too useful
# for this script. We'd like to client to attempt to startup, but
# fail if it can't get through to the server after a reasonable
# amount of time. There's no external support for this, so we'll
# expose the ZEO 1.0 internals. (consenting adults only)
t0 = time.time()
while t0 + WAIT > time.time():
storage._call.connect()
if storage._connected:
return
raise RuntimeError, "Unable to connect to ZEO server"
def pack1(addr, storage, days, wait):
cs = ClientStorage(addr, storage=storage,
wait_for_server_on_startup=wait)
if wait:
# _startup() is an artifact of the way ZEO 1.0 works. The
# ClientStorage doesn't get fully initialized until registerDB()
# is called. The only thing we care about, though, is that
# registerDB() calls _startup().
cs._startup()
else:
connect(cs)
cs.invalidator = None
cs.pack(wait=1, days=days)
cs.close()
def pack2(addr, storage, days):
cs = ClientStorage(addr, storage=storage, wait=1, read_only=1)
cs.pack(wait=1, days=days)
cs.close()
def usage(exit=1):
print __doc__
print " ".join(sys.argv)
sys.exit(exit)
def main():
host = None
port = None
unix = None
storage = '1'
days = 0
wait = 0
zeoversion = 2
try:
opts, args = getopt.getopt(sys.argv[1:], 'p:h:U:S:d:W1')
for o, a in opts:
if o == '-p':
port = int(a)
elif o == '-h':
host = a
elif o == '-U':
unix = a
elif o == '-S':
storage = a
elif o == '-d':
days = int(a)
elif o == '-W':
wait = 1
elif o == '-1':
zeoversion = 1
except Exception, err:
print err
usage()
if unix is not None:
addr = unix
else:
if host is None:
host = socket.gethostname()
if port is None:
usage()
addr = host, port
if zeoversion == 1:
pack1(addr, storage, days, wait)
else:
pack2(addr, storage, days)
if __name__ == "__main__":
try:
main()
except Exception, err:
print err
sys.exit(1)
#!/usr/bin/env python2.3
"""Report on the number of currently waiting clients in the ZEO queue.
Usage: %(PROGRAM)s [options] logfile
Options:
-h / --help
Print this help text and exit.
-v / --verbose
Verbose output
-f file
--file file
Use the specified file to store the incremental state as a pickle. If
not given, %(STATEFILE)s is used.
-r / --reset
Reset the state of the tool. This blows away any existing state
pickle file and then exits -- it does not parse the file. Use this
when you rotate log files so that the next run will parse from the
beginning of the file.
"""
import os
import re
import sys
import time
import errno
import getopt
import cPickle as pickle
COMMASPACE = ', '
STATEFILE = 'zeoqueue.pck'
PROGRAM = sys.argv[0]
try:
True, False
except NameError:
True = 1
False = 0
tcre = re.compile(r"""
(?P<ymd>
\d{4}- # year
\d{2}- # month
\d{2}) # day
T # separator
(?P<hms>
\d{2}: # hour
\d{2}: # minute
\d{2}) # second
""", re.VERBOSE)
ccre = re.compile(r"""
zrpc-conn:(?P<addr>\d+.\d+.\d+.\d+:\d+)\s+
calling\s+
(?P<method>
\w+) # the method
\( # args open paren
\' # string quote start
(?P<tid>
\S+) # first argument -- usually the tid
\' # end of string
(?P<rest>
.*) # rest of line
""", re.VERBOSE)
wcre = re.compile(r'Clients waiting: (?P<num>\d+)')
def parse_time(line):
"""Return the time portion of a zLOG line in seconds or None."""
mo = tcre.match(line)
if mo is None:
return None
date, time_ = mo.group('ymd', 'hms')
date_l = [int(elt) for elt in date.split('-')]
time_l = [int(elt) for elt in time_.split(':')]
return int(time.mktime(date_l + time_l + [0, 0, 0]))
class Txn:
"""Track status of single transaction."""
def __init__(self, tid):
self.tid = tid
self.hint = None
self.begin = None
self.vote = None
self.abort = None
self.finish = None
self.voters = []
def isactive(self):
if self.begin and not (self.abort or self.finish):
return True
else:
return False
class Status:
"""Track status of ZEO server by replaying log records.
We want to keep track of several events:
- The last committed transaction.
- The last committed or aborted transaction.
- The last transaction that got the lock but didn't finish.
- The client address doing the first vote of a transaction.
- The number of currently active transactions.
- The number of reported queued transactions.
- Client restarts.
- Number of current connections. XXX (This might not be useful.)
We can observe these events by reading the following sorts of log
entries:
2002-12-16T06:16:05 BLATHER(-100) zrpc:12649 calling
tpc_begin('\x03I\x90((\xdbp\xd5', '', 'QueueCatal...
2002-12-16T06:16:06 BLATHER(-100) zrpc:12649 calling
vote('\x03I\x90((\xdbp\xd5')
2002-12-16T06:16:06 BLATHER(-100) zrpc:12649 calling
tpc_finish('\x03I\x90((\xdbp\xd5')
2002-12-16T10:46:10 INFO(0) ZSS:12649:1 Transaction blocked waiting
for storage. Clients waiting: 1.
2002-12-16T06:15:57 BLATHER(-100) zrpc:12649 connect from
('10.0.26.54', 48983): <ManagedServerConnection ('10.0.26.54', 48983)>
2002-12-16T10:30:09 INFO(0) ZSS:12649:1 disconnected
"""
def __init__(self):
self.lineno = 0
self.pos = 0
self.reset()
def reset(self):
self.commit = None
self.commit_or_abort = None
self.last_unfinished = None
self.n_active = 0
self.n_blocked = 0
self.n_conns = 0
self.t_restart = None
self.txns = {}
def iscomplete(self):
# The status report will always be complete if we encounter an
# explicit restart.
if self.t_restart is not None:
return True
# If we haven't seen a restart, assume that seeing a finished
# transaction is good enough.
return self.commit is not None
def process_file(self, fp):
if self.pos:
if VERBOSE:
print 'seeking to file position', self.pos
fp.seek(self.pos)
while True:
line = fp.readline()
if not line:
break
self.lineno += 1
self.process(line)
self.pos = fp.tell()
def process(self, line):
if line.find("calling") != -1:
self.process_call(line)
elif line.find("connect") != -1:
self.process_connect(line)
# test for "locked" because word may start with "B" or "b"
elif line.find("locked") != -1:
self.process_block(line)
elif line.find("Starting") != -1:
self.process_start(line)
def process_call(self, line):
mo = ccre.search(line)
if mo is None:
return
called_method = mo.group('method')
# XXX exit earlier if we've got zeoLoad, because it's the most
# frequently called method and we don't use it.
if called_method == "zeoLoad":
return
t = parse_time(line)
meth = getattr(self, "call_%s" % called_method, None)
if meth is None:
return
client = mo.group('addr')
tid = mo.group('tid')
rest = mo.group('rest')
meth(t, client, tid, rest)
def process_connect(self, line):
pass
def process_block(self, line):
mo = wcre.search(line)
if mo is None:
# assume that this was a restart message for the last blocked
# transaction.
self.n_blocked = 0
else:
self.n_blocked = int(mo.group('num'))
def process_start(self, line):
if line.find("Starting ZEO server") != -1:
self.reset()
self.t_restart = parse_time(line)
def call_tpc_begin(self, t, client, tid, rest):
txn = Txn(tid)
txn.begin = t
if rest[0] == ',':
i = 1
while rest[i].isspace():
i += 1
rest = rest[i:]
txn.hint = rest
self.txns[tid] = txn
self.n_active += 1
self.last_unfinished = txn
def call_vote(self, t, client, tid, rest):
txn = self.txns.get(tid)
if txn is None:
print "Oops!"
txn = self.txns[tid] = Txn(tid)
txn.vote = t
txn.voters.append(client)
def call_tpc_abort(self, t, client, tid, rest):
txn = self.txns.get(tid)
if txn is None:
print "Oops!"
txn = self.txns[tid] = Txn(tid)
txn.abort = t
txn.voters = []
self.n_active -= 1
if self.commit_or_abort:
# delete the old transaction
try:
del self.txns[self.commit_or_abort.tid]
except KeyError:
pass
self.commit_or_abort = txn
def call_tpc_finish(self, t, client, tid, rest):
txn = self.txns.get(tid)
if txn is None:
print "Oops!"
txn = self.txns[tid] = Txn(tid)
txn.finish = t
txn.voters = []
self.n_active -= 1
if self.commit:
# delete the old transaction
try:
del self.txns[self.commit.tid]
except KeyError:
pass
if self.commit_or_abort:
# delete the old transaction
try:
del self.txns[self.commit_or_abort.tid]
except KeyError:
pass
self.commit = self.commit_or_abort = txn
def report(self):
print "Blocked transactions:", self.n_blocked
if not VERBOSE:
return
if self.t_restart:
print "Server started:", time.ctime(self.t_restart)
if self.commit is not None:
t = self.commit_or_abort.finish
if t is None:
t = self.commit_or_abort.abort
print "Last finished transaction:", time.ctime(t)
# the blocked transaction should be the first one that calls vote
L = [(txn.begin, txn) for txn in self.txns.values()]
L.sort()
for x, txn in L:
if txn.isactive():
began = txn.begin
if txn.voters:
print "Blocked client (first vote):", txn.voters[0]
print "Blocked transaction began at:", time.ctime(began)
print "Hint:", txn.hint
print "Idle time: %d sec" % int(time.time() - began)
break
def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
sys.exit(code)
def main():
global VERBOSE
VERBOSE = 0
file = STATEFILE
reset = False
# -0 is a secret option used for testing purposes only
seek = True
try:
opts, args = getopt.getopt(sys.argv[1:], 'vhf:r0',
['help', 'verbose', 'file=', 'reset'])
except getopt.error, msg:
usage(1, msg)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-v', '--verbose'):
VERBOSE += 1
elif opt in ('-f', '--file'):
file = arg
elif opt in ('-r', '--reset'):
reset = True
elif opt == '-0':
seek = False
if reset:
# Blow away the existing state file and exit
try:
os.unlink(file)
if VERBOSE:
print 'removing pickle state file', file
except OSError, e:
if e.errno <> errno.ENOENT:
raise
return
if not args:
usage(1, 'logfile is required')
if len(args) > 1:
usage(1, 'too many arguments: %s' % COMMASPACE.join(args))
path = args[0]
# Get the previous status object from the pickle file, if it is available
# and if the --reset flag wasn't given.
status = None
try:
statefp = open(file, 'rb')
try:
status = pickle.load(statefp)
if VERBOSE:
print 'reading status from file', file
finally:
statefp.close()
except IOError, e:
if e.errno <> errno.ENOENT:
raise
if status is None:
status = Status()
if VERBOSE:
print 'using new status'
if not seek:
status.pos = 0
fp = open(path, 'rb')
try:
status.process_file(fp)
finally:
fp.close()
# Save state
statefp = open(file, 'wb')
pickle.dump(status, statefp, 1)
statefp.close()
# Print the report and return the number of blocked clients in the exit
# status code.
status.report()
sys.exit(status.n_blocked)
if __name__ == "__main__":
main()
#!/usr/bin/env python2.3
"""Parse the BLATHER logging generated by ZEO, and optionally replay it.
Usage: zeointervals.py [options]
Options:
--help / -h
Print this message and exit.
--replay=storage
-r storage
Replay the parsed transactions through the new storage
--maxtxn=count
-m count
Parse no more than count transactions.
--report / -p
Print a report as we're parsing.
Unlike parsezeolog.py, this script generates timestamps for each transaction,
and sub-command in the transaction. We can use this to compare timings with
synthesized data.
"""
import re
import sys
import time
import getopt
import operator
# ZEO logs measure wall-clock time so for consistency we need to do the same
#from time import clock as now
from time import time as now
from ZODB.FileStorage import FileStorage
#from BDBStorage.BDBFullStorage import BDBFullStorage
#from Standby.primary import PrimaryStorage
#from Standby.config import RS_PORT
from ZODB.Transaction import Transaction
from ZODB.utils import p64
datecre = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
methcre = re.compile("ZEO Server (\w+)\((.*)\) \('(.*)', (\d+)")
class StopParsing(Exception):
pass
def usage(code, msg=''):
print __doc__
if msg:
print msg
sys.exit(code)
def parse_time(line):
"""Return the time portion of a zLOG line in seconds or None."""
mo = datecre.match(line)
if mo is None:
return None
date, time_ = mo.group(1, 2)
date_l = [int(elt) for elt in date.split('-')]
time_l = [int(elt) for elt in time_.split(':')]
return int(time.mktime(date_l + time_l + [0, 0, 0]))
def parse_line(line):
"""Parse a log entry and return time, method info, and client."""
t = parse_time(line)
if t is None:
return None, None, None
mo = methcre.search(line)
if mo is None:
return None, None, None
meth_name = mo.group(1)
meth_args = mo.group(2)
meth_args = [s.strip() for s in meth_args.split(',')]
m = meth_name, tuple(meth_args)
c = mo.group(3), mo.group(4)
return t, m, c
class StoreStat:
def __init__(self, when, oid, size):
self.when = when
self.oid = oid
self.size = size
# Crufty
def __getitem__(self, i):
if i == 0: return self.oid
if i == 1: return self.size
raise IndexError
class TxnStat:
def __init__(self):
self._begintime = None
self._finishtime = None
self._aborttime = None
self._url = None
self._objects = []
def tpc_begin(self, when, args, client):
self._begintime = when
# args are txnid, user, description (looks like it's always a url)
self._url = args[2]
def storea(self, when, args, client):
oid = int(args[0])
# args[1] is "[numbytes]"
size = int(args[1][1:-1])
s = StoreStat(when, oid, size)
self._objects.append(s)
def tpc_abort(self, when):
self._aborttime = when
def tpc_finish(self, when):
self._finishtime = when
# Mapping oid -> revid
_revids = {}
class ReplayTxn(TxnStat):
def __init__(self, storage):
self._storage = storage
self._replaydelta = 0
TxnStat.__init__(self)
def replay(self):
ZERO = '\0'*8
t0 = now()
t = Transaction()
self._storage.tpc_begin(t)
for obj in self._objects:
oid = obj.oid
revid = _revids.get(oid, ZERO)
# BAW: simulate a pickle of the given size
data = 'x' * obj.size
# BAW: ignore versions for now
newrevid = self._storage.store(p64(oid), revid, data, '', t)
_revids[oid] = newrevid
if self._aborttime:
self._storage.tpc_abort(t)
origdelta = self._aborttime - self._begintime
else:
self._storage.tpc_vote(t)
self._storage.tpc_finish(t)
origdelta = self._finishtime - self._begintime
t1 = now()
# Shows how many seconds behind (positive) or ahead (negative) of the
# original reply our local update took
self._replaydelta = t1 - t0 - origdelta
class ZEOParser:
def __init__(self, maxtxns=-1, report=1, storage=None):
self.__txns = []
self.__curtxn = {}
self.__skipped = 0
self.__maxtxns = maxtxns
self.__finishedtxns = 0
self.__report = report
self.__storage = storage
def parse(self, line):
t, m, c = parse_line(line)
if t is None:
# Skip this line
return
name = m[0]
meth = getattr(self, name, None)
if meth is not None:
meth(t, m[1], c)
def tpc_begin(self, when, args, client):
txn = ReplayTxn(self.__storage)
self.__curtxn[client] = txn
meth = getattr(txn, 'tpc_begin', None)
if meth is not None:
meth(when, args, client)
def storea(self, when, args, client):
txn = self.__curtxn.get(client)
if txn is None:
self.__skipped += 1
return
meth = getattr(txn, 'storea', None)
if meth is not None:
meth(when, args, client)
def tpc_finish(self, when, args, client):
txn = self.__curtxn.get(client)
if txn is None:
self.__skipped += 1
return
meth = getattr(txn, 'tpc_finish', None)
if meth is not None:
meth(when)
if self.__report:
self.report(txn)
self.__txns.append(txn)
self.__curtxn[client] = None
self.__finishedtxns += 1
if self.__maxtxns > 0 and self.__finishedtxns >= self.__maxtxns:
raise StopParsing
def report(self, txn):
"""Print a report about the transaction"""
if txn._objects:
bytes = reduce(operator.add, [size for oid, size in txn._objects])
else:
bytes = 0
print '%s %s %4d %10d %s %s' % (
txn._begintime, txn._finishtime - txn._begintime,
len(txn._objects),
bytes,
time.ctime(txn._begintime),
txn._url)
def replay(self):
for txn in self.__txns:
txn.replay()
# How many fell behind?
slower = []
faster = []
for txn in self.__txns:
if txn._replaydelta > 0:
slower.append(txn)
else:
faster.append(txn)
print len(slower), 'laggards,', len(faster), 'on-time or faster'
# Find some averages
if slower:
sum = reduce(operator.add,
[txn._replaydelta for txn in slower], 0)
print 'average slower txn was:', float(sum) / len(slower)
if faster:
sum = reduce(operator.add,
[txn._replaydelta for txn in faster], 0)
print 'average faster txn was:', float(sum) / len(faster)
def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'hr:pm:',
['help', 'replay=', 'report', 'maxtxns='])
except getopt.error, e:
usage(1, e)
if args:
usage(1)
replay = 0
maxtxns = -1
report = 0
storagefile = None
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-r', '--replay'):
replay = 1
storagefile = arg
elif opt in ('-p', '--report'):
report = 1
elif opt in ('-m', '--maxtxns'):
try:
maxtxns = int(arg)
except ValueError:
usage(1, 'Bad -m argument: %s' % arg)
if replay:
storage = FileStorage(storagefile)
#storage = BDBFullStorage(storagefile)
#storage = PrimaryStorage('yyz', storage, RS_PORT)
t0 = now()
p = ZEOParser(maxtxns, report, storage)
i = 0
while 1:
line = sys.stdin.readline()
if not line:
break
i += 1
try:
p.parse(line)
except StopParsing:
break
except:
print 'input file line:', i
raise
t1 = now()
print 'total parse time:', t1-t0
t2 = now()
if replay:
p.replay()
t3 = now()
print 'total replay time:', t3-t2
print 'total time:', t3-t0
if __name__ == '__main__':
main()
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Tools for analyzing ZEO Server logs.
This script contains a number of commands, implemented by command
functions. To run a command, give the command name and it's arguments
as arguments to this script.
Commands:
blocked_times file threshold
Output a summary of episodes where thransactions were blocked
when the episode lasted at least threshold seconds.
The file may be a file name or - to read from standard input.
The file may also be a command:
script blocked_times 'bunzip2 <foo.log.bz2' 60
If the file is a command, it must contain at least a single
space.
The columns of output are:
- The time the episode started
- The seconds from the start of the episode until the blocking
transaction finished.
- The client id (host and port) of the blocking transaction.
- The seconds from the start of the episode until the end of the
episode.
time_calls file threshold
Time how long calls took. Note that this is normally combined
with grep to time just a particulat kind of call:
script time_calls 'bunzip2 <foo.log.bz2 | grep tpc_finish' 10
time_trans threshold
The columns of output are:
- The time of the call invocation
- The seconds from the call to the return
- The client that made the call.
time_trans file threshold
Output a summary of transactions that held the global transaction
lock for at least threshold seconds. (This is the time from when
voting starts until the transaction is completed by the server.)
The columns of output are:
- time that the vote started.
- client id
- number of objects written / number of objects updated
- seconds from tpc_begin to vote start
- seconds spent voting
- vote status: n=normal, d=delayed, e=error
- seconds wating between vote return and finish call
- time spent finishing or 'abort' if the transaction aborted
minute file
Compute production statistics by minute
The columns of output are:
- date/time
- Number of active clients
- number of reads
- number of stores
- number of commits (finish)
- number of aborts
- number of transactions (commits + aborts)
Summary statistics are printed at the end
minutes file
Show just the summary statistics for production by minute.
hour file
Compute production statistics by hour
hours file
Show just the summary statistics for production by hour.
day file
Compute production statistics by day
days file
Show just the summary statistics for production by day.
verify file
Compute verification statistics
The columns of output are:
- client id
- verification start time
- number of object's verified
- wall time to verify
- average miliseconds to verify per object.
$Id: zeoserverlog.py,v 1.5 2004/03/18 13:27:49 yuppie Exp $
"""
import datetime, sys, re, os
def time(line):
d = line[:10]
t = line[11:19]
y, mo, d = map(int, d.split('-'))
h, mi, s = map(int, t.split(':'))
return datetime.datetime(y, mo, d, h, mi, s)
def sub(t1, t2):
delta = t2 - t1
return delta.days*86400.0+delta.seconds+delta.microseconds/1000000.0
waitre = re.compile(r'Clients waiting: (\d+)')
idre = re.compile(r' ZSS:\d+/(\d+.\d+.\d+.\d+:\d+) ')
def blocked_times(args):
f, thresh = args
t1 = t2 = cid = blocking = waiting = 0
last_blocking = False
thresh = int(thresh)
for line in xopen(f):
line = line.strip()
if line.endswith('Blocked transaction restarted.'):
blocking = False
waiting = 0
else:
s = waitre.search(line)
if not s:
continue
waiting = int(s.group(1))
blocking = line.find(
'Transaction blocked waiting for storage') >= 0
if blocking and waiting == 1:
t1 = time(line)
t2 = t1
if not blocking and last_blocking:
last_wait = 0
t2 = time(line)
cid = idre.search(line).group(1)
if waiting == 0:
d = sub(t1, time(line))
if d >= thresh:
print t1, sub(t1, t2), cid, d
t1 = t2 = cid = blocking = waiting = last_wait = max_wait = 0
last_blocking = blocking
connidre = re.compile(r' zrpc-conn:(\d+.\d+.\d+.\d+:\d+) ')
def time_calls(f):
f, thresh = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
thresh = float(thresh)
t1 = None
maxd = 0
for line in f:
line = line.strip()
if ' calling ' in line:
t1 = time(line)
elif ' returns ' in line and t1 is not None:
d = sub(t1, time(line))
if d >= thresh:
print t1, d, connidre.search(line).group(1)
maxd = max(maxd, d)
t1 = None
print maxd
def xopen(f):
if f == '-':
return sys.stdin
if ' ' in f:
return os.popen(f, 'r')
return open(f)
def time_tpc(f):
f, thresh = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
thresh = float(thresh)
transactions = {}
for line in f:
line = line.strip()
if ' calling vote(' in line:
cid = connidre.search(line).group(1)
transactions[cid] = time(line),
elif ' vote returns None' in line:
cid = connidre.search(line).group(1)
transactions[cid] += time(line), 'n'
elif ' vote() raised' in line:
cid = connidre.search(line).group(1)
transactions[cid] += time(line), 'e'
elif ' vote returns ' in line:
# delayed, skip
cid = connidre.search(line).group(1)
transactions[cid] += time(line), 'd'
elif ' calling tpc_abort(' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
t1, t2, vs = transactions[cid]
t = time(line)
d = sub(t1, t)
if d >= thresh:
print 'a', t1, cid, sub(t1, t2), vs, sub(t2, t)
del transactions[cid]
elif ' calling tpc_finish(' in line:
if cid in transactions:
cid = connidre.search(line).group(1)
transactions[cid] += time(line),
elif ' tpc_finish returns ' in line:
if cid in transactions:
t1, t2, vs, t3 = transactions[cid]
t = time(line)
d = sub(t1, t)
if d >= thresh:
print 'c', t1, cid, sub(t1, t2), vs, sub(t2, t3), sub(t3, t)
del transactions[cid]
newobre = re.compile(r"storea\(.*, '\\x00\\x00\\x00\\x00\\x00")
def time_trans(f):
f, thresh = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
thresh = float(thresh)
transactions = {}
for line in f:
line = line.strip()
if ' calling tpc_begin(' in line:
cid = connidre.search(line).group(1)
transactions[cid] = time(line), [0, 0]
if ' calling storea(' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
transactions[cid][1][0] += 1
if not newobre.search(line):
transactions[cid][1][1] += 1
elif ' calling vote(' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
transactions[cid] += time(line),
elif ' vote returns None' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
transactions[cid] += time(line), 'n'
elif ' vote() raised' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
transactions[cid] += time(line), 'e'
elif ' vote returns ' in line:
# delayed, skip
cid = connidre.search(line).group(1)
if cid in transactions:
transactions[cid] += time(line), 'd'
elif ' calling tpc_abort(' in line:
cid = connidre.search(line).group(1)
if cid in transactions:
try:
t0, (stores, old), t1, t2, vs = transactions[cid]
except ValueError:
pass
else:
t = time(line)
d = sub(t1, t)
if d >= thresh:
print t1, cid, "%s/%s" % (stores, old), \
sub(t0, t1), sub(t1, t2), vs, \
sub(t2, t), 'abort'
del transactions[cid]
elif ' calling tpc_finish(' in line:
if cid in transactions:
cid = connidre.search(line).group(1)
transactions[cid] += time(line),
elif ' tpc_finish returns ' in line:
if cid in transactions:
t0, (stores, old), t1, t2, vs, t3 = transactions[cid]
t = time(line)
d = sub(t1, t)
if d >= thresh:
print t1, cid, "%s/%s" % (stores, old), \
sub(t0, t1), sub(t1, t2), vs, \
sub(t2, t3), sub(t3, t)
del transactions[cid]
def minute(f, slice=16, detail=1, summary=1):
f, = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
cols = ["time", "reads", "stores", "commits", "aborts", "txns"]
fmt = "%18s %6s %6s %7s %6s %6s"
print fmt % cols
print fmt % ["-"*len(col) for col in cols]
mlast = r = s = c = a = cl = None
rs = []
ss = []
cs = []
as = []
ts = []
cls = []
for line in f:
line = line.strip()
if (line.find('returns') > 0
or line.find('storea') > 0
or line.find('tpc_abort') > 0
):
client = connidre.search(line).group(1)
m = line[:slice]
if m != mlast:
if mlast:
if detail:
print fmt % (mlast, len(cl), r, s, c, a, a+c)
cls.append(len(cl))
rs.append(r)
ss.append(s)
cs.append(c)
as.append(a)
ts.append(c+a)
mlast = m
r = s = c = a = 0
cl = {}
if line.find('zeoLoad') > 0:
r += 1
cl[client] = 1
elif line.find('storea') > 0:
s += 1
cl[client] = 1
elif line.find('tpc_finish') > 0:
c += 1
cl[client] = 1
elif line.find('tpc_abort') > 0:
a += 1
cl[client] = 1
if mlast:
if detail:
print fmt % (mlast, len(cl), r, s, c, a, a+c)
cls.append(len(cl))
rs.append(r)
ss.append(s)
cs.append(c)
as.append(a)
ts.append(c+a)
if summary:
print
print 'Summary: \t', '\t'.join(('min', '10%', '25%', 'med',
'75%', '90%', 'max', 'mean'))
print "n=%6d\t" % len(cls), '-'*62
print 'Clients: \t', '\t'.join(map(str,stats(cls)))
print 'Reads: \t', '\t'.join(map(str,stats( rs)))
print 'Stores: \t', '\t'.join(map(str,stats( ss)))
print 'Commits: \t', '\t'.join(map(str,stats( cs)))
print 'Aborts: \t', '\t'.join(map(str,stats( as)))
print 'Trans: \t', '\t'.join(map(str,stats( ts)))
def stats(s):
s.sort()
min = s[0]
max = s[-1]
n = len(s)
out = [min]
ni = n + 1
for p in .1, .25, .5, .75, .90:
lp = ni*p
l = int(lp)
if lp < 1 or lp > n:
out.append('-')
elif abs(lp-l) < .00001:
out.append(s[l-1])
else:
out.append(int(s[l-1] + (lp - l) * (s[l] - s[l-1])))
mean = 0.0
for v in s:
mean += v
out.extend([max, int(mean/n)])
return out
def minutes(f):
minute(f, 16, detail=0)
def hour(f):
minute(f, 13)
def day(f):
minute(f, 10)
def hours(f):
minute(f, 13, detail=0)
def days(f):
minute(f, 10, detail=0)
new_connection_idre = re.compile(r"new connection \('(\d+.\d+.\d+.\d+)', (\d+)\):")
def verify(f):
f, = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
t1 = None
nv = {}
for line in f:
if line.find('new connection') > 0:
m = new_connection_idre.search(line)
cid = "%s:%s" % (m.group(1), m.group(2))
nv[cid] = [time(line), 0]
elif line.find('calling zeoVerify(') > 0:
cid = connidre.search(line).group(1)
nv[cid][1] += 1
elif line.find('calling endZeoVerify()') > 0:
cid = connidre.search(line).group(1)
t1, n = nv[cid]
if n:
d = sub(t1, time(line))
print cid, t1, n, d, n and (d*1000.0/n) or '-'
def recovery(f):
f, = f
if f == '-':
f = sys.stdin
else:
f = xopen(f)
last = ''
trans = []
n = 0
for line in f:
n += 1
if line.find('RecoveryServer') < 0:
continue
l = line.find('sending transaction ')
if l > 0 and last.find('sending transaction ') > 0:
trans.append(line[l+20:].strip())
else:
if trans:
if len(trans) > 1:
print " ... %s similar records skipped ..." % (
len(trans) - 1)
print n, last.strip()
trans=[]
print n, line.strip()
last = line
if len(trans) > 1:
print " ... %s similar records skipped ..." % (
len(trans) - 1)
print n, last.strip()
if __name__ == '__main__':
globals()[sys.argv[1]](sys.argv[2:])
#!/usr/bin/env python2.3
"""Make sure a ZEO server is running.
usage: zeoup.py [options]
The test will connect to a ZEO server, load the root object, and attempt to
update the zeoup counter in the root. It will report success if it updates
the counter or if it gets a ConflictError. A ConflictError is considered a
success, because the client was able to start a transaction.
Options:
-p port -- port to connect to
-h host -- host to connect to (default is current host)
-S storage -- storage name (default '1')
-U path -- Unix-domain socket to connect to
--nowrite -- Do not update the zeoup counter.
-1 -- Connect to a ZEO 1.0 server.
You must specify either -p and -h or -U.
"""
import getopt
import socket
import sys
import time
from persistent.mapping import PersistentMapping
import transaction
import ZODB
from ZODB.POSException import ConflictError
from ZODB.tests.MinPO import MinPO
from ZEO.ClientStorage import ClientStorage
ZEO_VERSION = 2
def check_server(addr, storage, write):
t0 = time.time()
if ZEO_VERSION == 2:
# XXX should do retries w/ exponential backoff
cs = ClientStorage(addr, storage=storage, wait=0,
read_only=(not write))
else:
cs = ClientStorage(addr, storage=storage, debug=1,
wait_for_server_on_startup=1)
# _startup() is an artifact of the way ZEO 1.0 works. The
# ClientStorage doesn't get fully initialized until registerDB()
# is called. The only thing we care about, though, is that
# registerDB() calls _startup().
if write:
db = ZODB.DB(cs)
cn = db.open()
root = cn.root()
try:
# We store the data in a special `monitor' dict under the root,
# where other tools may also store such heartbeat and bookkeeping
# type data.
monitor = root.get('monitor')
if monitor is None:
monitor = root['monitor'] = PersistentMapping()
obj = monitor['zeoup'] = monitor.get('zeoup', MinPO(0))
obj.value += 1
transaction.commit()
except ConflictError:
pass
cn.close()
db.close()
else:
data, serial = cs.load("\0\0\0\0\0\0\0\0", "")
cs.close()
t1 = time.time()
print "Elapsed time: %.2f" % (t1 - t0)
def usage(exit=1):
print __doc__
print " ".join(sys.argv)
sys.exit(exit)
def main():
host = None
port = None
unix = None
write = 1
storage = '1'
try:
opts, args = getopt.getopt(sys.argv[1:], 'p:h:U:S:1',
['nowrite'])
for o, a in opts:
if o == '-p':
port = int(a)
elif o == '-h':
host = a
elif o == '-U':
unix = a
elif o == '-S':
storage = a
elif o == '--nowrite':
write = 0
elif o == '-1':
ZEO_VERSION = 1
except Exception, err:
s = str(err)
if s:
s = ": " + s
print err.__class__.__name__ + s
usage()
if unix is not None:
addr = unix
else:
if host is None:
host = socket.gethostname()
if port is None:
usage()
addr = host, port
check_server(addr, storage, write)
if __name__ == "__main__":
try:
main()
except SystemExit:
raise
except Exception, err:
s = str(err)
if s:
s = ": " + s
print err.__class__.__name__ + s
sys.exit(1)
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Test script for testing ZODB under a heavy zope-like load.
Note that, to be as realistic as possible with ZEO, you should run this
script multiple times, to simulate multiple clients.
Here's how this works.
The script starts some number of threads. Each thread, sequentially
executes jobs. There is a job producer that produces jobs.
Input data are provided by a mail producer that hands out message from
a mailbox.
Execution continues until there is an error, which will normally occur
when the mailbox is exhausted.
Command-line options are used to provide job definitions. Job
definitions have perameters of the form name=value. Jobs have 2
standard parameters:
frequency=integer
The frequency of the job. The default is 1.
sleep=float
The number os seconds to sleep before performing the job. The
default is 0.
Usage: loadmail2 [options]
Options:
-edit [frequency=integer] [sleep=float]
Define an edit job. An edit job edits a random already-saved
email message, deleting and inserting a random number of words.
After editing the message, the message is (re)cataloged.
-insert [number=int] [frequency=integer] [sleep=float]
Insert some number of email messages.
-index [number=int] [frequency=integer] [sleep=float]
Insert and index (catalog) some number of email messages.
-search [terms='word1 word2 ...'] [frequency=integer] [sleep=float]
Search the catalog. A query is givem with one or more terms as
would be entered into a typical seach box. If no query is
given, then queries will be randomly selected based on a set of
built-in word list.
-setup
Set up the database. This will delete any existing Data.fs
file. (Of course, this may have no effect, if there is a
custom_zodb that defined a different storage.) It also adds a
mail folder and a catalog.
-options file
Read options from the given file. Th efile should be a python
source file that defines a sequence of options named 'options'.
-threads n
Specify the number of threads to execute. If not specified (< 2),
then jobs are run in a single (main) thread.
-mbox filename
Specify the mailbox for getting input data.
$Id: zodbload.py,v 1.7 2004/04/16 16:00:56 jeremy Exp $
"""
import mailbox
import math
import os
import random
import re
import sys
import threading
import time
import transaction
class JobProducer:
def __init__(self):
self.jobs = []
def add(self, callable, frequency, sleep, repeatp=0):
self.jobs.extend([(callable, sleep, repeatp)] * int(frequency))
random.shuffle(self.jobs)
def next(self):
factory, sleep, repeatp = random.choice(self.jobs)
time.sleep(sleep)
callable, args = factory.create()
return factory, callable, args, repeatp
def __nonzero__(self):
return not not self.jobs
class MBox:
def __init__(self, filename):
if ' ' in filename:
filename, min, max = filename.split()
min = int(min)
max = int(max)
else:
min = max = 0
if filename.endswith('.bz2'):
f = os.popen("bunzip2 <"+filename, 'r')
filename = filename[-4:]
else:
f = open(filename)
self._mbox = mb = mailbox.UnixMailbox(f)
self.number = min
while min:
mb.next()
min -= 1
self._lock = threading.Lock()
self.__name__ = os.path.splitext(os.path.split(filename)[1])[0]
self._max = max
def next(self):
self._lock.acquire()
try:
if self._max > 0 and self.number >= self._max:
raise IndexError(self.number + 1)
message = self._mbox.next()
message.body = message.fp.read()
message.headers = list(message.headers)
self.number += 1
message.number = self.number
message.mbox = self.__name__
return message
finally:
self._lock.release()
bins = 9973
#bins = 11
def mailfolder(app, mboxname, number):
mail = getattr(app, mboxname, None)
if mail is None:
app.manage_addFolder(mboxname)
mail = getattr(app, mboxname)
from BTrees.Length import Length
mail.length = Length()
for i in range(bins):
mail.manage_addFolder('b'+str(i))
bin = hash(str(number))%bins
return getattr(mail, 'b'+str(bin))
def VmSize():
try:
f = open('/proc/%s/status' % os.getpid())
except:
return 0
else:
l = filter(lambda l: l[:7] == 'VmSize:', f.readlines())
if l:
l = l[0][7:].strip().split()[0]
return int(l)
return 0
def setup(lib_python):
try:
os.remove(os.path.join(lib_python, '..', '..', 'var', 'Data.fs'))
except:
pass
import Zope
import Products
import AccessControl.SecurityManagement
app=Zope.app()
Products.ZCatalog.ZCatalog.manage_addZCatalog(app, 'cat', '')
from Products.ZCTextIndex.ZCTextIndex import PLexicon
from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer
app.cat._setObject('lex',
PLexicon('lex', '', Splitter(), CaseNormalizer())
)
class extra:
doc_attr = 'PrincipiaSearchSource'
lexicon_id = 'lex'
index_type = 'Okapi BM25 Rank'
app.cat.addIndex('PrincipiaSearchSource', 'ZCTextIndex', extra)
transaction.commit()
system = AccessControl.SpecialUsers.system
AccessControl.SecurityManagement.newSecurityManager(None, system)
app._p_jar.close()
def do(db, f, args):
"""Do something in a transaction, retrying of necessary
Measure the speed of both the compurartion and the commit
"""
from ZODB.POSException import ConflictError
wcomp = ccomp = wcommit = ccommit = 0.0
rconflicts = wconflicts = 0
start = time.time()
while 1:
connection = db.open()
try:
transaction.begin()
t=time.time()
c=time.clock()
try:
try:
r = f(connection, *args)
except ConflictError:
rconflicts += 1
transaction.abort()
continue
finally:
wcomp += time.time() - t
ccomp += time.clock() - c
t=time.time()
c=time.clock()
try:
try:
transaction.commit()
break
except ConflictError:
wconflicts += 1
transaction.abort()
continue
finally:
wcommit += time.time() - t
ccommit += time.clock() - c
finally:
connection.close()
return start, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit, r
def run1(tid, db, factory, job, args):
(start, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit, r
) = do(db, job, args)
start = "%.4d-%.2d-%.2d %.2d:%.2d:%.2d" % time.localtime(start)[:6]
print "%s %s %8.3g %8.3g %s %s\t%8.3g %8.3g %s %r" % (
start, tid, wcomp, ccomp, rconflicts, wconflicts, wcommit, ccommit,
factory.__name__, r)
def run(jobs, tid=''):
import Zope
while 1:
factory, job, args, repeatp = jobs.next()
run1(tid, Zope.DB, factory, job, args)
if repeatp:
while 1:
i = random.randint(0,100)
if i > repeatp:
break
run1(tid, Zope.DB, factory, job, args)
def index(connection, messages, catalog):
app = connection.root()['Application']
for message in messages:
mail = mailfolder(app, message.mbox, message.number)
docid = 'm'+str(message.number)
mail.manage_addDTMLDocument(docid, file=message.body)
# increment counted
getattr(app, message.mbox).length.change(1)
doc = mail[docid]
for h in message.headers:
h = h.strip()
l = h.find(':')
if l <= 0:
continue
name = h[:l].lower()
if name=='subject':
name='title'
v = h[l+1:].strip()
type='string'
if name=='title':
doc.manage_changeProperties(title=h)
else:
try:
doc.manage_addProperty(name, v, type)
except:
pass
if catalog:
app.cat.catalog_object(doc)
return message.number
class IndexJob:
needs_mbox = 1
catalog = 1
prefix = 'index'
def __init__(self, mbox, number=1):
self.__name__ = "%s%s_%s" % (self.prefix, number, mbox.__name__)
self.mbox, self.number = mbox, int(number)
def create(self):
messages = [self.mbox.next() for i in range(self.number)]
return index, (messages, self.catalog)
class InsertJob(IndexJob):
catalog = 0
prefix = 'insert'
wordre = re.compile(r'(\w{3,20})')
stop = 'and', 'not'
def edit(connection, mbox, catalog=1):
app = connection.root()['Application']
mail = getattr(app, mbox.__name__, None)
if mail is None:
time.sleep(1)
return "No mailbox %s" % mbox.__name__
nmessages = mail.length()
if nmessages < 2:
time.sleep(1)
return "No messages to edit in %s" % mbox.__name__
# find a message to edit:
while 1:
number = random.randint(1, nmessages-1)
did = 'm' + str(number)
mail = mailfolder(app, mbox.__name__, number)
doc = getattr(mail, did, None)
if doc is not None:
break
text = doc.raw.split()
norig = len(text)
if norig > 10:
ndel = int(math.exp(random.randint(0, int(math.log(norig)))))
nins = int(math.exp(random.randint(0, int(math.log(norig)))))
else:
ndel = 0
nins = 10
for j in range(ndel):
j = random.randint(0,len(text)-1)
word = text[j]
m = wordre.search(word)
if m:
word = m.group(1).lower()
if (not wordsd.has_key(word)) and word not in stop:
words.append(word)
wordsd[word] = 1
del text[j]
for j in range(nins):
word = random.choice(words)
text.append(word)
doc.raw = ' '.join(text)
if catalog:
app.cat.catalog_object(doc)
return norig, ndel, nins
class EditJob:
needs_mbox = 1
prefix = 'edit'
catalog = 1
def __init__(self, mbox):
self.__name__ = "%s_%s" % (self.prefix, mbox.__name__)
self.mbox = mbox
def create(self):
return edit, (self.mbox, self.catalog)
class ModifyJob(EditJob):
prefix = 'modify'
catalog = 0
def search(connection, terms, number):
app = connection.root()['Application']
cat = app.cat
n = 0
for i in number:
term = random.choice(terms)
results = cat(PrincipiaSearchSource=term)
n += len(results)
for result in results:
obj = result.getObject()
# Apparently, there is a bug in Zope that leads obj to be None
# on occasion.
if obj is not None:
obj.getId()
return n
class SearchJob:
def __init__(self, terms='', number=10):
if terms:
terms = terms.split()
self.__name__ = "search_" + '_'.join(terms)
self.terms = terms
else:
self.__name__ = 'search'
self.terms = words
number = min(int(number), len(self.terms))
self.number = range(number)
def create(self):
return search, (self.terms, self.number)
words=['banishment', 'indirectly', 'imprecise', 'peeks',
'opportunely', 'bribe', 'sufficiently', 'Occidentalized', 'elapsing',
'fermenting', 'listen', 'orphanage', 'younger', 'draperies', 'Ida',
'cuttlefish', 'mastermind', 'Michaels', 'populations', 'lent',
'cater', 'attentional', 'hastiness', 'dragnet', 'mangling',
'scabbards', 'princely', 'star', 'repeat', 'deviation', 'agers',
'fix', 'digital', 'ambitious', 'transit', 'jeeps', 'lighted',
'Prussianizations', 'Kickapoo', 'virtual', 'Andrew', 'generally',
'boatsman', 'amounts', 'promulgation', 'Malay', 'savaging',
'courtesan', 'nursed', 'hungered', 'shiningly', 'ship', 'presides',
'Parke', 'moderns', 'Jonas', 'unenlightening', 'dearth', 'deer',
'domesticates', 'recognize', 'gong', 'penetrating', 'dependents',
'unusually', 'complications', 'Dennis', 'imbalances', 'nightgown',
'attached', 'testaments', 'congresswoman', 'circuits', 'bumpers',
'braver', 'Boreas', 'hauled', 'Howe', 'seethed', 'cult', 'numismatic',
'vitality', 'differences', 'collapsed', 'Sandburg', 'inches', 'head',
'rhythmic', 'opponent', 'blanketer', 'attorneys', 'hen', 'spies',
'indispensably', 'clinical', 'redirection', 'submit', 'catalysts',
'councilwoman', 'kills', 'topologies', 'noxious', 'exactions',
'dashers', 'balanced', 'slider', 'cancerous', 'bathtubs', 'legged',
'respectably', 'crochets', 'absenteeism', 'arcsine', 'facility',
'cleaners', 'bobwhite', 'Hawkins', 'stockade', 'provisional',
'tenants', 'forearms', 'Knowlton', 'commit', 'scornful',
'pediatrician', 'greets', 'clenches', 'trowels', 'accepts',
'Carboloy', 'Glenn', 'Leigh', 'enroll', 'Madison', 'Macon', 'oiling',
'entertainingly', 'super', 'propositional', 'pliers', 'beneficiary',
'hospitable', 'emigration', 'sift', 'sensor', 'reserved',
'colonization', 'shrilled', 'momentously', 'stevedore', 'Shanghaiing',
'schoolmasters', 'shaken', 'biology', 'inclination', 'immoderate',
'stem', 'allegory', 'economical', 'daytime', 'Newell', 'Moscow',
'archeology', 'ported', 'scandals', 'Blackfoot', 'leery', 'kilobit',
'empire', 'obliviousness', 'productions', 'sacrificed', 'ideals',
'enrolling', 'certainties', 'Capsicum', 'Brookdale', 'Markism',
'unkind', 'dyers', 'legislates', 'grotesquely', 'megawords',
'arbitrary', 'laughing', 'wildcats', 'thrower', 'sex', 'devils',
'Wehr', 'ablates', 'consume', 'gossips', 'doorways', 'Shari',
'advanced', 'enumerable', 'existentially', 'stunt', 'auctioneers',
'scheduler', 'blanching', 'petulance', 'perceptibly', 'vapors',
'progressed', 'rains', 'intercom', 'emergency', 'increased',
'fluctuating', 'Krishna', 'silken', 'reformed', 'transformation',
'easter', 'fares', 'comprehensible', 'trespasses', 'hallmark',
'tormenter', 'breastworks', 'brassiere', 'bladders', 'civet', 'death',
'transformer', 'tolerably', 'bugle', 'clergy', 'mantels', 'satin',
'Boswellizes', 'Bloomington', 'notifier', 'Filippo', 'circling',
'unassigned', 'dumbness', 'sentries', 'representativeness', 'souped',
'Klux', 'Kingstown', 'gerund', 'Russell', 'splices', 'bellow',
'bandies', 'beefers', 'cameramen', 'appalled', 'Ionian', 'butterball',
'Portland', 'pleaded', 'admiringly', 'pricks', 'hearty', 'corer',
'deliverable', 'accountably', 'mentors', 'accorded',
'acknowledgement', 'Lawrenceville', 'morphology', 'eucalyptus',
'Rena', 'enchanting', 'tighter', 'scholars', 'graduations', 'edges',
'Latinization', 'proficiency', 'monolithic', 'parenthesizing', 'defy',
'shames', 'enjoyment', 'Purdue', 'disagrees', 'barefoot', 'maims',
'flabbergast', 'dishonorable', 'interpolation', 'fanatics', 'dickens',
'abysses', 'adverse', 'components', 'bowl', 'belong', 'Pipestone',
'trainees', 'paw', 'pigtail', 'feed', 'whore', 'conditioner',
'Volstead', 'voices', 'strain', 'inhabits', 'Edwin', 'discourses',
'deigns', 'cruiser', 'biconvex', 'biking', 'depreciation', 'Harrison',
'Persian', 'stunning', 'agar', 'rope', 'wagoner', 'elections',
'reticulately', 'Cruz', 'pulpits', 'wilt', 'peels', 'plants',
'administerings', 'deepen', 'rubs', 'hence', 'dissension', 'implored',
'bereavement', 'abyss', 'Pennsylvania', 'benevolent', 'corresponding',
'Poseidon', 'inactive', 'butchers', 'Mach', 'woke', 'loading',
'utilizing', 'Hoosier', 'undo', 'Semitization', 'trigger', 'Mouthe',
'mark', 'disgracefully', 'copier', 'futility', 'gondola', 'algebraic',
'lecturers', 'sponged', 'instigators', 'looted', 'ether', 'trust',
'feeblest', 'sequencer', 'disjointness', 'congresses', 'Vicksburg',
'incompatibilities', 'commend', 'Luxembourg', 'reticulation',
'instructively', 'reconstructs', 'bricks', 'attache', 'Englishman',
'provocation', 'roughen', 'cynic', 'plugged', 'scrawls', 'antipode',
'injected', 'Daedalus', 'Burnsides', 'asker', 'confronter',
'merriment', 'disdain', 'thicket', 'stinker', 'great', 'tiers',
'oust', 'antipodes', 'Macintosh', 'tented', 'packages',
'Mediterraneanize', 'hurts', 'orthodontist', 'seeder', 'readying',
'babying', 'Florida', 'Sri', 'buckets', 'complementary',
'cartographer', 'chateaus', 'shaves', 'thinkable', 'Tehran',
'Gordian', 'Angles', 'arguable', 'bureau', 'smallest', 'fans',
'navigated', 'dipole', 'bootleg', 'distinctive', 'minimization',
'absorbed', 'surmised', 'Malawi', 'absorbent', 'close', 'conciseness',
'hopefully', 'declares', 'descent', 'trick', 'portend', 'unable',
'mildly', 'Morse', 'reference', 'scours', 'Caribbean', 'battlers',
'astringency', 'likelier', 'Byronizes', 'econometric', 'grad',
'steak', 'Austrian', 'ban', 'voting', 'Darlington', 'bison', 'Cetus',
'proclaim', 'Gilbertson', 'evictions', 'submittal', 'bearings',
'Gothicizer', 'settings', 'McMahon', 'densities', 'determinants',
'period', 'DeKastere', 'swindle', 'promptness', 'enablers', 'wordy',
'during', 'tables', 'responder', 'baffle', 'phosgene', 'muttering',
'limiters', 'custodian', 'prevented', 'Stouffer', 'waltz', 'Videotex',
'brainstorms', 'alcoholism', 'jab', 'shouldering', 'screening',
'explicitly', 'earner', 'commandment', 'French', 'scrutinizing',
'Gemma', 'capacitive', 'sheriff', 'herbivore', 'Betsey', 'Formosa',
'scorcher', 'font', 'damming', 'soldiers', 'flack', 'Marks',
'unlinking', 'serenely', 'rotating', 'converge', 'celebrities',
'unassailable', 'bawling', 'wording', 'silencing', 'scotch',
'coincided', 'masochists', 'graphs', 'pernicious', 'disease',
'depreciates', 'later', 'torus', 'interject', 'mutated', 'causer',
'messy', 'Bechtel', 'redundantly', 'profoundest', 'autopsy',
'philosophic', 'iterate', 'Poisson', 'horridly', 'silversmith',
'millennium', 'plunder', 'salmon', 'missioner', 'advances', 'provers',
'earthliness', 'manor', 'resurrectors', 'Dahl', 'canto', 'gangrene',
'gabler', 'ashore', 'frictionless', 'expansionism', 'emphasis',
'preservations', 'Duane', 'descend', 'isolated', 'firmware',
'dynamites', 'scrawled', 'cavemen', 'ponder', 'prosperity', 'squaw',
'vulnerable', 'opthalmic', 'Simms', 'unite', 'totallers', 'Waring',
'enforced', 'bridge', 'collecting', 'sublime', 'Moore', 'gobble',
'criticizes', 'daydreams', 'sedate', 'apples', 'Concordia',
'subsequence', 'distill', 'Allan', 'seizure', 'Isadore', 'Lancashire',
'spacings', 'corresponded', 'hobble', 'Boonton', 'genuineness',
'artifact', 'gratuities', 'interviewee', 'Vladimir', 'mailable',
'Bini', 'Kowalewski', 'interprets', 'bereave', 'evacuated', 'friend',
'tourists', 'crunched', 'soothsayer', 'fleetly', 'Romanizations',
'Medicaid', 'persevering', 'flimsy', 'doomsday', 'trillion',
'carcasses', 'guess', 'seersucker', 'ripping', 'affliction',
'wildest', 'spokes', 'sheaths', 'procreate', 'rusticates', 'Schapiro',
'thereafter', 'mistakenly', 'shelf', 'ruination', 'bushel',
'assuredly', 'corrupting', 'federation', 'portmanteau', 'wading',
'incendiary', 'thing', 'wanderers', 'messages', 'Paso', 'reexamined',
'freeings', 'denture', 'potting', 'disturber', 'laborer', 'comrade',
'intercommunicating', 'Pelham', 'reproach', 'Fenton', 'Alva', 'oasis',
'attending', 'cockpit', 'scout', 'Jude', 'gagging', 'jailed',
'crustaceans', 'dirt', 'exquisitely', 'Internet', 'blocker', 'smock',
'Troutman', 'neighboring', 'surprise', 'midscale', 'impart',
'badgering', 'fountain', 'Essen', 'societies', 'redresses',
'afterwards', 'puckering', 'silks', 'Blakey', 'sequel', 'greet',
'basements', 'Aubrey', 'helmsman', 'album', 'wheelers', 'easternmost',
'flock', 'ambassadors', 'astatine', 'supplant', 'gird', 'clockwork',
'foxes', 'rerouting', 'divisional', 'bends', 'spacer',
'physiologically', 'exquisite', 'concerts', 'unbridled', 'crossing',
'rock', 'leatherneck', 'Fortescue', 'reloading', 'Laramie', 'Tim',
'forlorn', 'revert', 'scarcer', 'spigot', 'equality', 'paranormal',
'aggrieves', 'pegs', 'committeewomen', 'documented', 'interrupt',
'emerald', 'Battelle', 'reconverted', 'anticipated', 'prejudices',
'drowsiness', 'trivialities', 'food', 'blackberries', 'Cyclades',
'tourist', 'branching', 'nugget', 'Asilomar', 'repairmen', 'Cowan',
'receptacles', 'nobler', 'Nebraskan', 'territorial', 'chickadee',
'bedbug', 'darted', 'vigilance', 'Octavia', 'summands', 'policemen',
'twirls', 'style', 'outlawing', 'specifiable', 'pang', 'Orpheus',
'epigram', 'Babel', 'butyrate', 'wishing', 'fiendish', 'accentuate',
'much', 'pulsed', 'adorned', 'arbiters', 'counted', 'Afrikaner',
'parameterizes', 'agenda', 'Americanism', 'referenda', 'derived',
'liquidity', 'trembling', 'lordly', 'Agway', 'Dillon', 'propellers',
'statement', 'stickiest', 'thankfully', 'autograph', 'parallel',
'impulse', 'Hamey', 'stylistic', 'disproved', 'inquirer', 'hoisting',
'residues', 'variant', 'colonials', 'dequeued', 'especial', 'Samoa',
'Polaris', 'dismisses', 'surpasses', 'prognosis', 'urinates',
'leaguers', 'ostriches', 'calculative', 'digested', 'divided',
'reconfigurer', 'Lakewood', 'illegalities', 'redundancy',
'approachability', 'masterly', 'cookery', 'crystallized', 'Dunham',
'exclaims', 'mainline', 'Australianizes', 'nationhood', 'pusher',
'ushers', 'paranoia', 'workstations', 'radiance', 'impedes',
'Minotaur', 'cataloging', 'bites', 'fashioning', 'Alsop', 'servants',
'Onondaga', 'paragraph', 'leadings', 'clients', 'Latrobe',
'Cornwallis', 'excitingly', 'calorimetric', 'savior', 'tandem',
'antibiotics', 'excuse', 'brushy', 'selfish', 'naive', 'becomes',
'towers', 'popularizes', 'engender', 'introducing', 'possession',
'slaughtered', 'marginally', 'Packards', 'parabola', 'utopia',
'automata', 'deterrent', 'chocolates', 'objectives', 'clannish',
'aspirin', 'ferociousness', 'primarily', 'armpit', 'handfuls',
'dangle', 'Manila', 'enlivened', 'decrease', 'phylum', 'hardy',
'objectively', 'baskets', 'chaired', 'Sepoy', 'deputy', 'blizzard',
'shootings', 'breathtaking', 'sticking', 'initials', 'epitomized',
'Forrest', 'cellular', 'amatory', 'radioed', 'horrified', 'Neva',
'simultaneous', 'delimiter', 'expulsion', 'Himmler', 'contradiction',
'Remus', 'Franklinizations', 'luggage', 'moisture', 'Jews',
'comptroller', 'brevity', 'contradictions', 'Ohio', 'active',
'babysit', 'China', 'youngest', 'superstition', 'clawing', 'raccoons',
'chose', 'shoreline', 'helmets', 'Jeffersonian', 'papered',
'kindergarten', 'reply', 'succinct', 'split', 'wriggle', 'suitcases',
'nonce', 'grinders', 'anthem', 'showcase', 'maimed', 'blue', 'obeys',
'unreported', 'perusing', 'recalculate', 'rancher', 'demonic',
'Lilliputianize', 'approximation', 'repents', 'yellowness',
'irritates', 'Ferber', 'flashlights', 'booty', 'Neanderthal',
'someday', 'foregoes', 'lingering', 'cloudiness', 'guy', 'consumer',
'Berkowitz', 'relics', 'interpolating', 'reappearing', 'advisements',
'Nolan', 'turrets', 'skeletal', 'skills', 'mammas', 'Winsett',
'wheelings', 'stiffen', 'monkeys', 'plainness', 'braziers', 'Leary',
'advisee', 'jack', 'verb', 'reinterpret', 'geometrical', 'trolleys',
'arboreal', 'overpowered', 'Cuzco', 'poetical', 'admirations',
'Hobbes', 'phonemes', 'Newsweek', 'agitator', 'finally', 'prophets',
'environment', 'easterners', 'precomputed', 'faults', 'rankly',
'swallowing', 'crawl', 'trolley', 'spreading', 'resourceful', 'go',
'demandingly', 'broader', 'spiders', 'Marsha', 'debris', 'operates',
'Dundee', 'alleles', 'crunchier', 'quizzical', 'hanging', 'Fisk']
wordsd = {}
for word in words:
wordsd[word] = 1
def collect_options(args, jobs, options):
while args:
arg = args.pop(0)
if arg.startswith('-'):
name = arg[1:]
if name == 'options':
fname = args.pop(0)
d = {}
execfile(fname, d)
collect_options(list(d['options']), jobs, options)
elif options.has_key(name):
v = args.pop(0)
if options[name] != None:
raise ValueError(
"Duplicate values for %s, %s and %s"
% (name, v, options[name])
)
options[name] = v
elif name == 'setup':
options['setup'] = 1
elif globals().has_key(name.capitalize()+'Job'):
job = name
kw = {}
while args and args[0].find("=") > 0:
arg = args.pop(0).split('=')
name, v = arg[0], '='.join(arg[1:])
if kw.has_key(name):
raise ValueError(
"Duplicate parameter %s for job %s"
% (name, job)
)
kw[name]=v
if kw.has_key('frequency'):
frequency = kw['frequency']
del kw['frequency']
else:
frequency = 1
if kw.has_key('sleep'):
sleep = float(kw['sleep'])
del kw['sleep']
else:
sleep = 0.0001
if kw.has_key('repeat'):
repeatp = float(kw['repeat'])
del kw['repeat']
else:
repeatp = 0
jobs.append((job, kw, frequency, sleep, repeatp))
else:
raise ValueError("not an option or job", name)
else:
raise ValueError("Expected an option", arg)
def find_lib_python():
for b in os.getcwd(), os.path.split(sys.argv[0])[0]:
for i in range(6):
d = ['..']*i + ['lib', 'python']
p = os.path.join(b, *d)
if os.path.isdir(p):
return p
raise ValueError("Couldn't find lib/python")
def main(args=None):
lib_python = find_lib_python()
sys.path.insert(0, lib_python)
if args is None:
args = sys.argv[1:]
if not args:
print __doc__
sys.exit(0)
print args
random.seed(hash(tuple(args))) # always use the same for the given args
options = {"mbox": None, "threads": None}
jobdefs = []
collect_options(args, jobdefs, options)
mboxes = {}
if options["mbox"]:
mboxes[options["mbox"]] = MBox(options["mbox"])
if options.has_key('setup'):
setup(lib_python)
else:
import Zope
Zope.startup()
#from ThreadedAsync.LoopCallback import loop
#threading.Thread(target=loop, args=(), name='asyncore').start()
jobs = JobProducer()
for job, kw, frequency, sleep, repeatp in jobdefs:
Job = globals()[job.capitalize()+'Job']
if getattr(Job, 'needs_mbox', 0):
if not kw.has_key("mbox"):
if not options["mbox"]:
raise ValueError(
"no mailbox (mbox option) file specified")
kw['mbox'] = mboxes[options["mbox"]]
else:
if not mboxes.has_key[kw["mbox"]]:
mboxes[kw['mbox']] = MBox[kw['mbox']]
kw["mbox"] = mboxes[kw['mbox']]
jobs.add(Job(**kw), frequency, sleep, repeatp)
if not jobs:
print "No jobs to execute"
return
threads = int(options['threads'] or '0')
if threads > 1:
threads = [threading.Thread(target=run, args=(jobs, i), name=str(i))
for i in range(threads)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
else:
run(jobs)
if __name__ == '__main__':
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment