Commit b5e39d4a authored by Kirill Smelkov's avatar Kirill Smelkov

X wcfs/treegen: allstructs: Do not keep all tree structures in memory

We need to sample n random ones from AllStructs. We can sample that
without keeping first whole AllStructs result in memory.

Starting from maxdepth=4 and nkeys=10 there are too many entries in that
whole list. For example the following input was causing treegen
allstructs to be OOM killed on my machine:

    4 2 20/8808469856591574482 0:f,1:d,2:g 0:d,1:e,2:a,3:h,4:d,5:h,6:d,7:c

Now it works ok without eating memory.
parent e5601af1
...@@ -122,6 +122,7 @@ from ZODB import DB ...@@ -122,6 +122,7 @@ from ZODB import DB
from ZODB.Connection import Connection from ZODB.Connection import Connection
from ZODB.MappingStorage import MappingStorage from ZODB.MappingStorage import MappingStorage
import transaction import transaction
import itertools
import random import random
import six import six
...@@ -292,10 +293,6 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None): ...@@ -292,10 +293,6 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None):
commit('kv2') commit('kv2')
t2struct0 = xbtree.StructureOf(ztree) t2struct0 = xbtree.StructureOf(ztree)
# all tree topologies that can represent kv1 and kv2
t1AllStructs = list(xbtree.AllStructs(kv1.keys(), maxdepth, maxsplit, kv=kv1))
t2AllStructs = list(xbtree.AllStructs(kv2.keys(), maxdepth, maxsplit, kv=kv2))
# seed # seed
if seed is None: if seed is None:
seed = time.now() seed = time.now()
...@@ -303,12 +300,16 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None): ...@@ -303,12 +300,16 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None):
random.seed(seed) random.seed(seed)
print("# maxdepth=%d maxsplit=%d n=%d seed=%d" % (maxdepth, maxsplit, n, seed)) print("# maxdepth=%d maxsplit=%d n=%d seed=%d" % (maxdepth, maxsplit, n, seed))
# make 2·n random samples from all tree topologies that can represent kv1 and kv2
t1structv = rsample(xbtree.AllStructs(kv1.keys(), maxdepth, maxsplit, kv=kv1), n)
t2structv = rsample(xbtree.AllStructs(kv2.keys(), maxdepth, maxsplit, kv=kv2), n)
# all tree1 and tree2 topologies jumps in between we are going to emit: # all tree1 and tree2 topologies jumps in between we are going to emit:
# native + n random ones. # native + n random ones.
if t1struct0 in t1AllStructs: t1AllStructs.remove(t1struct0) # avoid dups if t1struct0 in t1structv: t1structv.remove(t1struct0) # avoid dups
if t2struct0 in t2AllStructs: t2AllStructs.remove(t2struct0) if t2struct0 in t2structv: t2structv.remove(t2struct0)
t1structv = [t1struct0] + random.sample(t1AllStructs, min(n, len(t1AllStructs))) t1structv.insert(0, t1struct0)
t2structv = [t2struct0] + random.sample(t2AllStructs, min(n, len(t2AllStructs))) t2structv.insert(0, t2struct0)
# emit topologies for tree1->tree2 and tree1<-tree2 transitions for all # emit topologies for tree1->tree2 and tree1<-tree2 transitions for all
# combinations of tree1 and tree2. # combinations of tree1 and tree2.
...@@ -321,6 +322,24 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None): ...@@ -321,6 +322,24 @@ def AllStructs(kv1txt, kv2txt, maxdepth, maxsplit, n, seed=None):
print(zctx.TopoEncode(tstruct)) print(zctx.TopoEncode(tstruct))
# rsample returns k random samples from seq.
# it differs from random.sample in that it does not keep whole list(seq) in memory.
def rsample(seq, k): # -> [] of items; len <= k
# based on https://stackoverflow.com/a/35671225/9456786
# https://en.wikipedia.org/wiki/Reservoir_sampling
if k <= 0:
raise ValueError("negative sample size")
it = iter(seq)
sample = list(itertools.islice(it, k))
random.shuffle(sample)
i = k
for item in it:
i += 1
j = random.randrange(i) # [0,i)
if j < k:
sample[j] = item
return sample
# bitravel2Way generates travel path through all A<->B edges such # bitravel2Way generates travel path through all A<->B edges such
# that all edges a->b and a<-b are traveled and exactly once. # that all edges a->b and a<-b are traveled and exactly once.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment