Commit 1ee72371 authored by Kirill Smelkov's avatar Kirill Smelkov

Demo program that shows how to work with ZBigArrays bigger than RAM in size

This shows how to first generate such arrays (in steps, as every
transaction change should fit in memory), and then gather data from
whole array using C/Fortran/etc code.

It shows how to compute mean via NumPy's ndarray.mean()

It also shows that e.g. ndarray.var() wants to create temporaries in
size of original ndarray and that would fail, because it does not fit
into RAM.

ndarray.var() should not need to create such temporaries in principle -
all it has to do is to first compute mean, and then compute

    sum (Xi - <X>)^2

in a loop.

<X> is scalar, Xi - is just access to original array.


So this also show NumPy can be incrementally improved to avoid creating
such temporaries, and then it will work.
parent bb9d8bf1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Demo program that generates and computes on ZBigArray bigger than RAM
gen: generate signal
read: read generated signal and compute its mean/var/sum
TODO text
from __future__ import print_function
from wendelin.bigarray.array_zodb import ZBigArray
from ZODB.FileStorage import FileStorage
from ZODB import DB
import transaction
from numpy import float64, dtype, cumsum, sin
import psutil
import sys
KB = 1024
MB = 1024*KB
GB = 1024*MB
# read signal and compute its average/var/sum
# signalv - BigArray with signal
def read(signalv):
print('sig: %s %s (= %.2fGB)' % \
('x'.join('%s' % _ for _ in signalv.shape),
signalv.dtype, float(signalv.nbytes) / GB))
a = signalv[:] # BigArray -> ndarray
print('<sig>:\t%s' % a.mean())
#print('δ(sig):\t%s' % a.var()) # XXX wants to produce temps (var = S (a - <a>)^2
print('S(sig):\t%s' % a.sum())
# generate signal S(t) = M⋅sin(f⋅t)
f = 0.2
M = 15
def gen(signalv):
print('gen signal t=0...%.2e %s (= %.2fGB) ' % \
(len(signalv), signalv.dtype, float(signalv.nbytes) / GB))
a = signalv[:] # BigArray -> ndarray
blocksize = 32*MB//a.itemsize # will write out signal in such blocks
for t0 in xrange(0, len(a), blocksize):
ablk = a[t0:t0+blocksize]
ablk[:] = 1 # at = 1
cumsum(ablk, out=ablk) # at = t-t0+1
ablk += (t0-1) # at = t
ablk *= f # at = f⋅t
sin(ablk, out=ablk) # at = sin(f⋅t)
ablk *= M # at = M⋅sin(f⋅t)
note = 'gen signal blk [%s:%s] (%.1f%%)' % (t0, t0+len(ablk), 100. * (t0+len(ablk)) / len(a))
txn = transaction.get()
def usage():
print("Usage: %s (gen|read) <db>" % sys.argv[0], file=sys.stderr)
def main():
act = sys.argv[1]
dbpath = sys.argv[2]
except IndexError:
if act not in ('gen', 'read'):
ram_nbytes = psutil.virtual_memory().total
print('I: RAM: %.2fGB' % (float(ram_nbytes) / GB))
stor = FileStorage(dbpath)
db = DB(stor)
conn =
root = conn.root()
if act == 'gen':
sig_dtype = dtype(float64)
sig_len = (2*ram_nbytes) // sig_dtype.itemsize
sig = ZBigArray((sig_len,), sig_dtype)
root['signalv'] = sig
# ZBigArray requirement: before we can compute it (with subobject
# .zfile) have to be made explicitly known to connection or current
# transaction committed
elif act == 'read':
import os
p = psutil.Process(os.getpid())
m = p.memory_info()
print('VIRT: %i MB\tRSS: %iMB' % (m.vms//MB, m.rss//MB))
if __name__ == '__main__':
......@@ -191,6 +191,8 @@ setup(
'six', # compat py2/py3
'psutil', # demo_zbigarray
extras_require = {
......@@ -203,4 +205,10 @@ setup(
'test': viamake('test', 'run tests'),
'bench': viamake('bench', 'run benchmarks'),
entry_points= {'console_scripts': [
# demo to test
'demo-zbigarray = wendelin.demo.demo_zbigarray:main',
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment