Commit 1e506a81 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki Committed by Kirill Smelkov

zodbanalyze: now supports both FileStorage and repozo deltafs

/reviewed-on nexedi/zodbtools!1
/see-also nexedi/slapos!116
parent ab17cf2d
......@@ -8,5 +8,6 @@ scripts anymore. So we are here:
__ https://github.com/zopefoundation/ZODB/pull/128#issuecomment-260970932
- `zodbanalyze` - analyze FileStorage or repozo deltafs usage.
- `zodbcmp` - compare content of two ZODB databases bit-to-bit.
- `zodbdump` - dump content of a ZODB database.
......@@ -26,8 +26,9 @@ setup(
# zodb cmd ...
# zodb dump ...
entry_points= {'console_scripts': [
'zodbcmp = zodbtool.zodbcmp:main',
'zodbdump = zodbtool.zodbdump:main',
'zodbanalyze = zodbtool.zodbanalyze:main',
'zodbcmp = zodbtool.zodbcmp:main',
'zodbdump = zodbtool.zodbdump:main',
]
},
......
#!/usr/bin/env python2.4
#!/usr/bin/env python
# Based on a transaction analyzer by Matt Kromer.
......@@ -8,12 +8,43 @@ import getopt
import anydbm as dbm
import tempfile
import shutil
from ZODB.FileStorage import FileStorage
from ZODB.FileStorage import FileIterator, FileStorage, packed_version
from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata
class DeltaFileStorage(
FileStorageFormatter,
):
def __init__(self, file_name, **kw):
self._file_name = file_name
def iterator(self, start=None, stop=None):
return DeltaFileIterator(self._file_name, start, stop)
class DeltaFileIterator(FileIterator):
def __init__(self, filename, start=None, stop=None, pos=0L):
assert isinstance(filename, str)
file = open(filename, 'rb')
self._file = file
file.seek(0,2)
self._file_size = file.tell()
if pos > self._file_size:
raise ValueError("Given position is greater than the file size",
pos, self._file_size)
self._pos = pos
assert start is None or isinstance(start, str)
assert stop is None or isinstance(stop, str)
self._start = start
self._stop = stop
if start:
if self._file_size <= 4:
return
self._skip_to_start(start)
class Report:
def __init__(self, use_dbm=False):
def __init__(self, use_dbm=False, delta_fs=False):
self.use_dbm = use_dbm
self.delta_fs = delta_fs
if use_dbm:
self.temp_dir = tempfile.mkdtemp()
self.OIDMAP = dbm.open(os.path.join(self.temp_dir, 'oidmap.db'),
......@@ -52,6 +83,7 @@ def shorten(s, n):
return "..." + s
def report(rep, csv=False):
delta_fs = rep.delta_fs
if not csv:
print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
......@@ -59,17 +91,28 @@ def report(rep, csv=False):
(rep.DBYTES * 1.0 / rep.TIDS))
print "Types used:"
if csv:
fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format
if delta_fs:
if csv:
fmt = "%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f" # per-class format
else:
fmt = "%-46s %7s %9s %6s %7s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
else:
fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format
if csv:
fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format
else:
fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
"C.Count", "C.Bytes", "O.Count", "O.Bytes")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
"C.Count", "C.Bytes", "O.Count", "O.Bytes")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
typemap = rep.TYPEMAP.keys()
typemap.sort(key=lambda a:rep.TYPESIZE[a])
cumpct = 0.0
......@@ -80,32 +123,46 @@ def report(rep, csv=False):
t_display = t
else:
t_display = shorten(t, 46)
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
rep.COIDSMAP[t], rep.CBYTESMAP[t],
rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0))
if delta_fs:
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
else:
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
rep.COIDSMAP[t], rep.CBYTESMAP[t],
rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0))
if csv:
return
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9)
print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
rep.CBYTES * 100.0 / rep.DBYTES,
rep.CBYTES * 1.0 / rep.COIDS)
if rep.FOIDS:
print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path, use_dbm):
fs = FileStorage(path, read_only=1)
if delta_fs:
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
print "%46s %7d %9s %6s %6.2f" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS)
print fmts % ('Total Records', rep.OIDS, rep.DBYTES, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
else:
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9)
print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
rep.CBYTES * 100.0 / rep.DBYTES,
rep.CBYTES * 1.0 / rep.COIDS)
if rep.FOIDS:
print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path, use_dbm, delta_fs):
if delta_fs:
fs = DeltaFileStorage(path, read_only=1)
else:
fs = FileStorage(path, read_only=1)
fsi = fs.iterator()
report = Report(use_dbm)
report = Report(use_dbm, delta_fs)
for txn in fsi:
analyze_trans(report, txn)
if use_dbm:
......@@ -130,44 +187,52 @@ def analyze_rec(report, record):
try:
size = len(record.data) # Ignores various overhead
report.DBYTES += size
if oid not in report.OIDMAP:
if report.delta_fs:
type = get_type(record)
report.OIDMAP[oid] = type
if report.use_dbm:
report.USEDMAP[oid] = str(size)
else:
report.USEDMAP[oid] = size
report.COIDS += 1
report.CBYTES += size
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
else:
type = report.OIDMAP[oid]
if report.use_dbm:
fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size)
if oid not in report.OIDMAP:
type = get_type(record)
report.OIDMAP[oid] = type
if report.use_dbm:
report.USEDMAP[oid] = str(size)
else:
report.USEDMAP[oid] = size
report.COIDS += 1
report.CBYTES += size
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else:
fsize = report.USEDMAP[oid]
report.USEDMAP[oid] = size
report.FOIDS += 1
report.FBYTES += fsize
report.CBYTES += size - fsize
report.FOIDSMAP[type] = report.FOIDSMAP.get(type, 0) + 1
report.FBYTESMAP[type] = report.FBYTESMAP.get(type, 0) + fsize
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size - fsize
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
type = report.OIDMAP[oid]
if report.use_dbm:
fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size)
else:
fsize = report.USEDMAP[oid]
report.USEDMAP[oid] = size
report.FOIDS += 1
report.FBYTES += fsize
report.CBYTES += size - fsize
report.FOIDSMAP[type] = report.FOIDSMAP.get(type, 0) + 1
report.FBYTESMAP[type] = report.FBYTESMAP.get(type, 0) + fsize
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size - fsize
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
except Exception, err:
print err
__doc__ = """%(program)s: Data.fs analyzer
__doc__ = """%(program)s: Analyzer for FileStorage data or repozo deltafs
usage: %(program)s [options] /path/to/Data.fs
usage: %(program)s [options] /path/to/Data.fs (or /path/to/file.deltafs)
Options:
-h, --help this help screen
-c, --csv output CSV
-d, --dbm use DBM as temporary storage to limit memory usage
(no meaning for deltafs case)
Note:
Input deltafs file should be uncompressed.
"""
def usage(stream, msg=None):
......@@ -196,7 +261,18 @@ def main():
if opt in ('-h', '--help'):
usage(sys.stdout)
sys.exit()
report(analyze(path, use_dbm), csv)
header = open(path, 'rb').read(4)
if header == packed_version:
delta_fs = False
else:
delta_fs = True
_orig_read_data_header = FileStorageFormatter._read_data_header
def _read_data_header(self, pos, oid=None):
h = _orig_read_data_header(self, pos, oid=oid)
h.tloc = self._tpos
return h
FileStorageFormatter._read_data_header = _read_data_header
report(analyze(path, use_dbm, delta_fs), csv)
if __name__ == "__main__":
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment