Commit 1e506a81 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki Committed by Kirill Smelkov

zodbanalyze: now supports both FileStorage and repozo deltafs

/reviewed-on nexedi/zodbtools!1
/see-also nexedi/slapos!116
parent ab17cf2d
...@@ -8,5 +8,6 @@ scripts anymore. So we are here: ...@@ -8,5 +8,6 @@ scripts anymore. So we are here:
__ https://github.com/zopefoundation/ZODB/pull/128#issuecomment-260970932 __ https://github.com/zopefoundation/ZODB/pull/128#issuecomment-260970932
- `zodbanalyze` - analyze FileStorage or repozo deltafs usage.
- `zodbcmp` - compare content of two ZODB databases bit-to-bit. - `zodbcmp` - compare content of two ZODB databases bit-to-bit.
- `zodbdump` - dump content of a ZODB database. - `zodbdump` - dump content of a ZODB database.
...@@ -26,8 +26,9 @@ setup( ...@@ -26,8 +26,9 @@ setup(
# zodb cmd ... # zodb cmd ...
# zodb dump ... # zodb dump ...
entry_points= {'console_scripts': [ entry_points= {'console_scripts': [
'zodbcmp = zodbtool.zodbcmp:main', 'zodbanalyze = zodbtool.zodbanalyze:main',
'zodbdump = zodbtool.zodbdump:main', 'zodbcmp = zodbtool.zodbcmp:main',
'zodbdump = zodbtool.zodbdump:main',
] ]
}, },
......
#!/usr/bin/env python2.4 #!/usr/bin/env python
# Based on a transaction analyzer by Matt Kromer. # Based on a transaction analyzer by Matt Kromer.
...@@ -8,12 +8,43 @@ import getopt ...@@ -8,12 +8,43 @@ import getopt
import anydbm as dbm import anydbm as dbm
import tempfile import tempfile
import shutil import shutil
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileIterator, FileStorage, packed_version
from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata from ZODB.utils import get_pickle_metadata
class DeltaFileStorage(
FileStorageFormatter,
):
def __init__(self, file_name, **kw):
self._file_name = file_name
def iterator(self, start=None, stop=None):
return DeltaFileIterator(self._file_name, start, stop)
class DeltaFileIterator(FileIterator):
def __init__(self, filename, start=None, stop=None, pos=0L):
assert isinstance(filename, str)
file = open(filename, 'rb')
self._file = file
file.seek(0,2)
self._file_size = file.tell()
if pos > self._file_size:
raise ValueError("Given position is greater than the file size",
pos, self._file_size)
self._pos = pos
assert start is None or isinstance(start, str)
assert stop is None or isinstance(stop, str)
self._start = start
self._stop = stop
if start:
if self._file_size <= 4:
return
self._skip_to_start(start)
class Report: class Report:
def __init__(self, use_dbm=False): def __init__(self, use_dbm=False, delta_fs=False):
self.use_dbm = use_dbm self.use_dbm = use_dbm
self.delta_fs = delta_fs
if use_dbm: if use_dbm:
self.temp_dir = tempfile.mkdtemp() self.temp_dir = tempfile.mkdtemp()
self.OIDMAP = dbm.open(os.path.join(self.temp_dir, 'oidmap.db'), self.OIDMAP = dbm.open(os.path.join(self.temp_dir, 'oidmap.db'),
...@@ -52,6 +83,7 @@ def shorten(s, n): ...@@ -52,6 +83,7 @@ def shorten(s, n):
return "..." + s return "..." + s
def report(rep, csv=False): def report(rep, csv=False):
delta_fs = rep.delta_fs
if not csv: if not csv:
print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS) print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS) print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
...@@ -59,17 +91,28 @@ def report(rep, csv=False): ...@@ -59,17 +91,28 @@ def report(rep, csv=False):
(rep.DBYTES * 1.0 / rep.TIDS)) (rep.DBYTES * 1.0 / rep.TIDS))
print "Types used:" print "Types used:"
if csv: if delta_fs:
fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s" if csv:
fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format fmt = "%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f" # per-class format
else:
fmt = "%-46s %7s %9s %6s %7s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
else: else:
fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s" if csv:
fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format
else:
fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
"C.Count", "C.Bytes", "O.Count", "O.Bytes")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
"C.Count", "C.Bytes", "O.Count", "O.Bytes")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
typemap = rep.TYPEMAP.keys() typemap = rep.TYPEMAP.keys()
typemap.sort(key=lambda a:rep.TYPESIZE[a]) typemap.sort(key=lambda a:rep.TYPESIZE[a])
cumpct = 0.0 cumpct = 0.0
...@@ -80,32 +123,46 @@ def report(rep, csv=False): ...@@ -80,32 +123,46 @@ def report(rep, csv=False):
t_display = t t_display = t
else: else:
t_display = shorten(t, 46) t_display = shorten(t, 46)
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t], if delta_fs:
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t], print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
rep.COIDSMAP[t], rep.CBYTESMAP[t], pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0)) else:
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
rep.COIDSMAP[t], rep.CBYTESMAP[t],
rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0))
if csv: if csv:
return return
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9) if delta_fs:
print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ', print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0) print "%46s %7d %9s %6s %6.2f" % ('Total Transactions', rep.TIDS, ' ',
print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct, ' ', rep.DBYTES * 1.0 / rep.TIDS)
rep.DBYTES * 1.0 / rep.OIDS) print fmts % ('Total Records', rep.OIDS, rep.DBYTES, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0, else:
rep.CBYTES * 100.0 / rep.DBYTES, print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9)
rep.CBYTES * 1.0 / rep.COIDS) print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
if rep.FOIDS: ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0, print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
rep.FBYTES * 100.0 / rep.DBYTES, rep.DBYTES * 1.0 / rep.OIDS)
rep.FBYTES * 1.0 / rep.FOIDS)
print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
def analyze(path, use_dbm): rep.CBYTES * 100.0 / rep.DBYTES,
fs = FileStorage(path, read_only=1) rep.CBYTES * 1.0 / rep.COIDS)
if rep.FOIDS:
print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path, use_dbm, delta_fs):
if delta_fs:
fs = DeltaFileStorage(path, read_only=1)
else:
fs = FileStorage(path, read_only=1)
fsi = fs.iterator() fsi = fs.iterator()
report = Report(use_dbm) report = Report(use_dbm, delta_fs)
for txn in fsi: for txn in fsi:
analyze_trans(report, txn) analyze_trans(report, txn)
if use_dbm: if use_dbm:
...@@ -130,44 +187,52 @@ def analyze_rec(report, record): ...@@ -130,44 +187,52 @@ def analyze_rec(report, record):
try: try:
size = len(record.data) # Ignores various overhead size = len(record.data) # Ignores various overhead
report.DBYTES += size report.DBYTES += size
if oid not in report.OIDMAP: if report.delta_fs:
type = get_type(record) type = get_type(record)
report.OIDMAP[oid] = type report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
if report.use_dbm: report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
report.USEDMAP[oid] = str(size)
else:
report.USEDMAP[oid] = size
report.COIDS += 1
report.CBYTES += size
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else: else:
type = report.OIDMAP[oid] if oid not in report.OIDMAP:
if report.use_dbm: type = get_type(record)
fsize = int(report.USEDMAP[oid]) report.OIDMAP[oid] = type
report.USEDMAP[oid] = str(size) if report.use_dbm:
report.USEDMAP[oid] = str(size)
else:
report.USEDMAP[oid] = size
report.COIDS += 1
report.CBYTES += size
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else: else:
fsize = report.USEDMAP[oid] type = report.OIDMAP[oid]
report.USEDMAP[oid] = size if report.use_dbm:
report.FOIDS += 1 fsize = int(report.USEDMAP[oid])
report.FBYTES += fsize report.USEDMAP[oid] = str(size)
report.CBYTES += size - fsize else:
report.FOIDSMAP[type] = report.FOIDSMAP.get(type, 0) + 1 fsize = report.USEDMAP[oid]
report.FBYTESMAP[type] = report.FBYTESMAP.get(type, 0) + fsize report.USEDMAP[oid] = size
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size - fsize report.FOIDS += 1
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1 report.FBYTES += fsize
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size report.CBYTES += size - fsize
report.FOIDSMAP[type] = report.FOIDSMAP.get(type, 0) + 1
report.FBYTESMAP[type] = report.FBYTESMAP.get(type, 0) + fsize
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size - fsize
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
except Exception, err: except Exception, err:
print err print err
__doc__ = """%(program)s: Data.fs analyzer __doc__ = """%(program)s: Analyzer for FileStorage data or repozo deltafs
usage: %(program)s [options] /path/to/Data.fs usage: %(program)s [options] /path/to/Data.fs (or /path/to/file.deltafs)
Options: Options:
-h, --help this help screen -h, --help this help screen
-c, --csv output CSV -c, --csv output CSV
-d, --dbm use DBM as temporary storage to limit memory usage -d, --dbm use DBM as temporary storage to limit memory usage
(no meaning for deltafs case)
Note:
Input deltafs file should be uncompressed.
""" """
def usage(stream, msg=None): def usage(stream, msg=None):
...@@ -196,7 +261,18 @@ def main(): ...@@ -196,7 +261,18 @@ def main():
if opt in ('-h', '--help'): if opt in ('-h', '--help'):
usage(sys.stdout) usage(sys.stdout)
sys.exit() sys.exit()
report(analyze(path, use_dbm), csv) header = open(path, 'rb').read(4)
if header == packed_version:
delta_fs = False
else:
delta_fs = True
_orig_read_data_header = FileStorageFormatter._read_data_header
def _read_data_header(self, pos, oid=None):
h = _orig_read_data_header(self, pos, oid=oid)
h.tloc = self._tpos
return h
FileStorageFormatter._read_data_header = _read_data_header
report(analyze(path, use_dbm, delta_fs), csv)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment