Commit bf026b27 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

support using DBM as temporary storage to limit memory usage by '-d' or '--dbm'.

parent b4647d63
...@@ -5,15 +5,26 @@ ...@@ -5,15 +5,26 @@
import sys import sys
import os import os
import getopt import getopt
import anydbm as dbm
import tempfile
import shutil
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileStorage
from ZODB.utils import get_pickle_metadata from ZODB.utils import get_pickle_metadata
class Report: class Report:
def __init__(self): def __init__(self, use_dbm=False):
self.OIDMAP = {} self.use_dbm = use_dbm
if use_dbm:
self.temp_dir = tempfile.mkdtemp()
self.OIDMAP = dbm.open(os.path.join(self.temp_dir, 'oidmap.db'),
'nf')
self.USEDMAP = dbm.open(os.path.join(self.temp_dir, 'usedmap.db'),
'nf')
else:
self.OIDMAP = {}
self.USEDMAP = {}
self.TYPEMAP = {} self.TYPEMAP = {}
self.TYPESIZE = {} self.TYPESIZE = {}
self.USEDMAP = {}
self.TIDS = 0 self.TIDS = 0
self.OIDS = 0 self.OIDS = 0
self.DBYTES = 0 self.DBYTES = 0
...@@ -91,12 +102,14 @@ def report(rep, csv=False): ...@@ -91,12 +102,14 @@ def report(rep, csv=False):
rep.FBYTES * 100.0 / rep.DBYTES, rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS) rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path): def analyze(path, use_dbm):
fs = FileStorage(path, read_only=1) fs = FileStorage(path, read_only=1)
fsi = fs.iterator() fsi = fs.iterator()
report = Report() report = Report(use_dbm)
for txn in fsi: for txn in fsi:
analyze_trans(report, txn) analyze_trans(report, txn)
if use_dbm:
shutil.rmtree(report.temp_dir)
return report return report
def analyze_trans(report, txn): def analyze_trans(report, txn):
...@@ -120,15 +133,22 @@ def analyze_rec(report, record): ...@@ -120,15 +133,22 @@ def analyze_rec(report, record):
if oid not in report.OIDMAP: if oid not in report.OIDMAP:
type = get_type(record) type = get_type(record)
report.OIDMAP[oid] = type report.OIDMAP[oid] = type
report.USEDMAP[oid] = size if report.use_dbm:
report.USEDMAP[oid] = str(size)
else:
report.USEDMAP[oid] = size
report.COIDS += 1 report.COIDS += 1
report.CBYTES += size report.CBYTES += size
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1 report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else: else:
type = report.OIDMAP[oid] type = report.OIDMAP[oid]
fsize = report.USEDMAP[oid] if report.use_dbm:
report.USEDMAP[oid] = size fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size)
else:
fsize = report.USEDMAP[oid]
report.USEDMAP[oid] = size
report.FOIDS += 1 report.FOIDS += 1
report.FBYTES += fsize report.FBYTES += fsize
report.CBYTES += size - fsize report.CBYTES += size - fsize
...@@ -147,6 +167,7 @@ usage: %(program)s [options] /path/to/Data.fs ...@@ -147,6 +167,7 @@ usage: %(program)s [options] /path/to/Data.fs
Options: Options:
-h, --help this help screen -h, --help this help screen
-c, --csv output CSV -c, --csv output CSV
-d, --dbm use DBM as temporary storage to limit memory usage
""" """
def usage(stream, msg=None): def usage(stream, msg=None):
...@@ -160,16 +181,19 @@ def usage(stream, msg=None): ...@@ -160,16 +181,19 @@ def usage(stream, msg=None):
def main(): def main():
try: try:
opts, args = getopt.getopt(sys.argv[1:], opts, args = getopt.getopt(sys.argv[1:],
'hc', ['help', 'csv']) 'hcd', ['help', 'csv', 'dbm'])
path = args[0] path = args[0]
except (getopt.GetoptError, IndexError), msg: except (getopt.GetoptError, IndexError), msg:
usage(sys.stderr, msg) usage(sys.stderr, msg)
sys.exit(2) sys.exit(2)
csv = False csv = False
use_dbm = False
for opt, args in opts: for opt, args in opts:
if opt in ('-c', '--csv'): if opt in ('-c', '--csv'):
csv = True csv = True
elif opt in ('-h', '--help'): if opt in ('-d', '--dbm'):
use_dbm = True
if opt in ('-h', '--help'):
usage(sys.stdout) usage(sys.stdout)
sys.exit() sys.exit()
report(analyze(path), csv) report(analyze(path, use_dbm), csv)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment