Commit de61249b authored by Jeremy Hylton's avatar Jeremy Hylton

Add a quick hack that reports summary statistics from fsdump.

parent 1cf61122
#!python
"""Print details statistics from fsdump output."""
import re
import sys
rx_txn = re.compile("tid=([0-9a-f]+).*size=(\d+)")
rx_data = re.compile("oid=([0-9a-f]+) class=(\S+) size=(\d+)")
def sort_byhsize(seq, reverse=False):
L = [(v.size(), k, v) for k, v in seq]
L.sort()
if reverse:
L.reverse()
return [(k, v) for n, k, v in L]
class Histogram(dict):
def add(self, size):
self[size] = self.get(size, 0) + 1
def size(self):
return sum(self.itervalues())
def mean(self):
product = sum([k * v for k, v in self.iteritems()])
return product / self.size()
def median(self):
# close enough?
n = self.size() / 2
L = self.keys()
L.sort()
L.reverse()
while 1:
k = L.pop()
if self[k] > n:
return k
n -= self[k]
def mode(self):
mode = 0
value = 0
for k, v in self.iteritems():
if v > value:
value = v
mode = k
return mode
def make_bins(self, binsize):
maxkey = max(self.iterkeys())
self.binsize = binsize
self.bins = [0] * (1 + maxkey / binsize)
for k, v in self.iteritems():
b = k / binsize
self.bins[b] += v
def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
if usebins:
# Use existing bins with whatever size they have
binsize = self.binsize
else:
# Make new bins
self.make_bins(binsize)
maxval = max(self.bins)
# Print up to 40 dots for a value
dot = max(maxval / 40, 1)
tot = sum(self.bins)
print name
print "Total", tot,
print "Median", self.median(),
print "Mean", self.mean(),
print "Mode", self.mode(),
print "Max", max(self)
print "One * represents", dot
gap = False
cum = 0
for i, n in enumerate(self.bins):
if gaps and (not n or (skip and not n / dot)):
if not gap:
print " ..."
gap = True
continue
gap = False
p = 100 * n / tot
cum += n
pc = 100 * cum / tot
print "%6d %6d %3d%% %3d%% %s" % (
i * binsize, n, p, pc, "*" * (n / dot))
print
def class_detail(class_size):
# summary of classes
fmt = "%5s %6s %6s %6s %-50.50s"
labels = ["num", "median", "mean", "mode", "class"]
print fmt % tuple(labels)
print fmt % tuple(["-" * len(s) for s in labels])
for klass, h in sort_byhsize(class_size.iteritems()):
print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
print
# per class details
for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
h.make_bins(50)
if len(filter(None, h.bins)) == 1:
continue
h.report("Object size for %s" % klass, usebins=True)
def revision_detail(lifetimes, classes):
# Report per-class details for any object modified more than once
for name, oids in classes.iteritems():
h = Histogram()
keep = False
for oid in dict.fromkeys(oids, 1):
L = lifetimes.get(oid)
n = len(L)
h.add(n)
if n > 1:
keep = True
if keep:
h.report("Number of revisions for %s" % name, binsize=10)
def main(path):
txn_objects = Histogram() # histogram of txn size in objects
txn_bytes = Histogram() # histogram of txn size in bytes
obj_size = Histogram() # histogram of object size
n_updates = Histogram() # oid -> num updates
n_classes = Histogram() # class -> num objects
lifetimes = {} # oid -> list of tids
class_size = {} # class -> histogram of object size
classes = {} # class -> list of oids
MAX = 0
tid = None
f = open(path, "rb")
for i, line in enumerate(f):
if MAX and i > MAX:
break
if line.startswith(" data"):
m = rx_data.search(line)
if not m:
continue
oid, klass, size = m.groups()
size = int(size)
obj_size.add(size)
n_updates.add(oid)
n_classes.add(klass)
h = class_size.get(klass)
if h is None:
h = class_size[klass] = Histogram()
h.add(size)
L = lifetimes.setdefault(oid, [])
L.append(tid)
L = classes.setdefault(klass, [])
L.append(oid)
objects += 1
elif line.startswith("Trans"):
if tid is not None:
txn_objects.add(objects)
m = rx_txn.search(line)
if not m:
continue
tid, size = m.groups()
size = int(size)
objects = 0
txn_bytes.add(size)
f.close()
print "Summary: %d txns, %d objects, %d revisions" % (
txn_objects.size(), len(n_updates), n_updates.size())
print
txn_bytes.report("Transaction size (bytes)", binsize=1024)
txn_objects.report("Transaction size (objects)", binsize=10)
obj_size.report("Object size", binsize=128)
# object lifetime info
h = Histogram()
for k, v in lifetimes.items():
h.add(len(v))
h.report("Number of revisions", binsize=10, skip=False)
# details about revisions
revision_detail(lifetimes, classes)
class_detail(class_size)
if __name__ == "__main__":
main(sys.argv[1])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment