Commit 1245acc9 authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: In-place .append()

ca064f75 (bigarray: Support resizing in-place) added O(1) in-place
BigArray.resize() which makes possible for users to append data to BigArray in
O(δ) time.

But it is easy for people to make off-by-one mistakes when calculating
indices for append.

So provide a convenient BigArray.append() which simplifies the following

    A                               # ZBigArray e.g. of shape       (N, 3)
    values                          # ndarray to append of shape    (δ, 3)
    n, δ = len(A), len(values)      # length of A's major index  =N
    A.resize((n+δ, A.shape[1:]))    # add δ new entries ; now len(A) =N+δ
    A[-δ:] = values                 # set data for last new δ entries

into

    A.append(values)

/cc @klaus
parent 605a2a90
...@@ -36,7 +36,7 @@ of physical RAM. ...@@ -36,7 +36,7 @@ of physical RAM.
from __future__ import print_function from __future__ import print_function
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, sign, newaxis from numpy import ndarray, dtype, sign, newaxis, asarray
import logging import logging
...@@ -180,6 +180,46 @@ class BigArray(object): ...@@ -180,6 +180,46 @@ class BigArray(object):
self._init0(new_shape, self.dtype, order='C') # FIXME order hardcoded self._init0(new_shape, self.dtype, order='C') # FIXME order hardcoded
# append BigArray in-place
#
# NOTE
#
# - numpy.append(array, δ) creates new array and copies array and δ there
# (works in O(array + δ) time)
#
# - BigArray.append(δ) resizes array and copies δ to tail
# (works in O(δ) time)
#
# values - must be ndarray-like with compatible dtype of the same shape
# as extended array, except major axis, e.g.
#
# BigArray (N,10,5)
# values (3,10,5)
#
# XXX we assume major axis is 0 (C ordering)
def append(self, values):
values = asarray(values)
# make sure appended values, after major axis, are of the same shape
if self.shape[1:] != values.shape[1:]:
# NOTE the same exception as in numpy.append()
raise ValueError('all the input array dimensions except for the'
'concatenation axis must match exactly')
# resize us, and prepare to rollback, in case of e.g. dtype
# incompatibility catched on follow-up assignment
n, delta = self.shape[0], values.shape[0]
self.resize( (n+delta,) + self.shape[1:] )
# copy values to prepared tail place, and we are done
try:
self[-delta:] = values
except:
# in case of error - rollback the resize and re-raise
self.resize( (n,) + self.shape[1:] )
raise
# ~~~ get/set item/slice connect bigfile blocks to ndarray in RAM. # ~~~ get/set item/slice connect bigfile blocks to ndarray in RAM.
# only basic indexing is supported - see numpy/.../arrays.indexing.rst # only basic indexing is supported - see numpy/.../arrays.indexing.rst
......
...@@ -21,7 +21,7 @@ from wendelin.lib.zodb import dbclose ...@@ -21,7 +21,7 @@ from wendelin.lib.zodb import dbclose
from wendelin.lib.testing import getTestDB from wendelin.lib.testing import getTestDB
from persistent import UPTODATE from persistent import UPTODATE
import transaction import transaction
from numpy import dtype, uint8, all, array_equal from numpy import dtype, uint8, all, array_equal, arange
testdb = None testdb = None
def setup_module(): def setup_module():
...@@ -148,6 +148,9 @@ def test_zbigarray(): ...@@ -148,6 +148,9 @@ def test_zbigarray():
b[16*1024*1024] = 100 b[16*1024*1024] = 100
b[-1] = 255 b[-1] = 255
A.append(arange(10, 14, dtype=uint8))
# commit; reload & verify changes # commit; reload & verify changes
transaction.commit() transaction.commit()
dbclose(root) dbclose(root)
...@@ -158,7 +161,7 @@ def test_zbigarray(): ...@@ -158,7 +161,7 @@ def test_zbigarray():
A = root['zarray'] A = root['zarray']
assert isinstance(A, ZBigArray) assert isinstance(A, ZBigArray)
assert A.shape == (24*1024*1024,) assert A.shape == (24*1024*1024 + 4,)
assert A.dtype == dtype(uint8) assert A.dtype == dtype(uint8)
a = A[:] a = A[:]
...@@ -170,3 +173,8 @@ def test_zbigarray(): ...@@ -170,3 +173,8 @@ def test_zbigarray():
assert a[16*1024*1024] == 100 assert a[16*1024*1024] == 100
assert a[24*1024*1024-1] == 255 assert a[24*1024*1024-1] == 255
assert a[24*1024*1024+0] == 10
assert a[24*1024*1024+1] == 11
assert a[24*1024*1024+2] == 12
assert a[24*1024*1024+3] == 13
...@@ -20,8 +20,9 @@ from wendelin.bigarray import BigArray ...@@ -20,8 +20,9 @@ from wendelin.bigarray import BigArray
from wendelin.bigfile import BigFile from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \ from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \
array_equal, asarray array_equal, asarray
import numpy
from pytest import raises from pytest import raises
...@@ -415,6 +416,47 @@ def test_bigarray_resize(): ...@@ -415,6 +416,47 @@ def test_bigarray_resize():
assert array_equal(c.ravel(), arange(11*3, dtype=uint32)) assert array_equal(c.ravel(), arange(11*3, dtype=uint32))
# ~ arange(n*3*2).reshape(n,3,2)
def arange32(start, stop, dtype=None):
return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2)
def test_bigarray_append():
data = zeros(8*PS, dtype=uint32)
f = BigFile_Data(data, PS)
fh = f.fileh_open()
# first make sure arange32 works correctly
x = numpy.append( arange32(0,4), arange32(4,7), axis=0 )
assert array_equal(x, arange32(0,7))
assert array_equal(x.ravel(), arange(7*3*2))
# init empty BigArray of shape (x,3,2)
A = BigArray((0,3,2), int64, fh)
assert array_equal(A[:], arange32(0,0))
# append initial data
A.append(arange32(0,2))
assert array_equal(A[:], arange32(0,2))
A.append(arange32(2,3))
assert array_equal(A[:], arange32(0,3))
# append plain list (test for arg conversion)
A.append([[[18,19], [20,21], [22,23]]])
assert array_equal(A[:], arange32(0,4))
# append with incorrect shape - rejected, original stays the same
assert raises(ValueError, 'A.append(arange(3))')
assert array_equal(A[:], arange32(0,4))
assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))')
assert array_equal(A[:], arange32(0,4))
# append with correct shape, but incompatible dtype - rejected, original stays the same
assert raises(ValueError, 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))')
assert array_equal(A[:], arange32(0,4))
def test_bigarray_list(): def test_bigarray_list():
Z = BigFile_Zero(PS) Z = BigFile_Zero(PS)
Zh = Z.fileh_open() Zh = Z.fileh_open()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment