### bigarray: In-place .append()

```ca064f75 (bigarray: Support resizing in-place) added O(1) in-place
BigArray.resize() which makes possible for users to append data to BigArray in
O(δ) time.

But it is easy for people to make off-by-one mistakes when calculating
indices for append.

So provide a convenient BigArray.append() which simplifies the following

A                               # ZBigArray e.g. of shape       (N, 3)
values                          # ndarray to append of shape    (δ, 3)
n, δ = len(A), len(values)      # length of A's major index  =N
A.resize((n+δ, A.shape[1:]))    # add δ new entries ; now len(A) =N+δ
A[-δ:] = values                 # set data for last new δ entries

into

A.append(values)

/cc @klaus```
parent 605a2a90
 ... ... @@ -36,7 +36,7 @@ of physical RAM. from __future__ import print_function from wendelin.lib.calc import mul from numpy import ndarray, dtype, sign, newaxis from numpy import ndarray, dtype, sign, newaxis, asarray import logging ... ... @@ -180,6 +180,46 @@ class BigArray(object): self._init0(new_shape, self.dtype, order='C') # FIXME order hardcoded # append BigArray in-place # # NOTE # # - numpy.append(array, δ) creates new array and copies array and δ there # (works in O(array + δ) time) # # - BigArray.append(δ) resizes array and copies δ to tail # (works in O(δ) time) # # values - must be ndarray-like with compatible dtype of the same shape # as extended array, except major axis, e.g. # # BigArray (N,10,5) # values (3,10,5) # # XXX we assume major axis is 0 (C ordering) def append(self, values): values = asarray(values) # make sure appended values, after major axis, are of the same shape if self.shape[1:] != values.shape[1:]: # NOTE the same exception as in numpy.append() raise ValueError('all the input array dimensions except for the' 'concatenation axis must match exactly') # resize us, and prepare to rollback, in case of e.g. dtype # incompatibility catched on follow-up assignment n, delta = self.shape, values.shape self.resize( (n+delta,) + self.shape[1:] ) # copy values to prepared tail place, and we are done try: self[-delta:] = values except: # in case of error - rollback the resize and re-raise self.resize( (n,) + self.shape[1:] ) raise # ~~~ get/set item/slice connect bigfile blocks to ndarray in RAM. # only basic indexing is supported - see numpy/.../arrays.indexing.rst ... ...
 ... ... @@ -21,7 +21,7 @@ from wendelin.lib.zodb import dbclose from wendelin.lib.testing import getTestDB from persistent import UPTODATE import transaction from numpy import dtype, uint8, all, array_equal from numpy import dtype, uint8, all, array_equal, arange testdb = None def setup_module(): ... ... @@ -148,6 +148,9 @@ def test_zbigarray(): b[16*1024*1024] = 100 b[-1] = 255 A.append(arange(10, 14, dtype=uint8)) # commit; reload & verify changes transaction.commit() dbclose(root) ... ... @@ -158,7 +161,7 @@ def test_zbigarray(): A = root['zarray'] assert isinstance(A, ZBigArray) assert A.shape == (24*1024*1024,) assert A.shape == (24*1024*1024 + 4,) assert A.dtype == dtype(uint8) a = A[:] ... ... @@ -170,3 +173,8 @@ def test_zbigarray(): assert a[16*1024*1024] == 100 assert a[24*1024*1024-1] == 255 assert a[24*1024*1024+0] == 10 assert a[24*1024*1024+1] == 11 assert a[24*1024*1024+2] == 12 assert a[24*1024*1024+3] == 13
 ... ... @@ -20,8 +20,9 @@ from wendelin.bigarray import BigArray from wendelin.bigfile import BigFile from wendelin.lib.mem import memcpy from wendelin.lib.calc import mul from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \ from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \ array_equal, asarray import numpy from pytest import raises ... ... @@ -415,6 +416,47 @@ def test_bigarray_resize(): assert array_equal(c.ravel(), arange(11*3, dtype=uint32)) # ~ arange(n*3*2).reshape(n,3,2) def arange32(start, stop, dtype=None): return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2) def test_bigarray_append(): data = zeros(8*PS, dtype=uint32) f = BigFile_Data(data, PS) fh = f.fileh_open() # first make sure arange32 works correctly x = numpy.append( arange32(0,4), arange32(4,7), axis=0 ) assert array_equal(x, arange32(0,7)) assert array_equal(x.ravel(), arange(7*3*2)) # init empty BigArray of shape (x,3,2) A = BigArray((0,3,2), int64, fh) assert array_equal(A[:], arange32(0,0)) # append initial data A.append(arange32(0,2)) assert array_equal(A[:], arange32(0,2)) A.append(arange32(2,3)) assert array_equal(A[:], arange32(0,3)) # append plain list (test for arg conversion) A.append([[[18,19], [20,21], [22,23]]]) assert array_equal(A[:], arange32(0,4)) # append with incorrect shape - rejected, original stays the same assert raises(ValueError, 'A.append(arange(3))') assert array_equal(A[:], arange32(0,4)) assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))') assert array_equal(A[:], arange32(0,4)) # append with correct shape, but incompatible dtype - rejected, original stays the same assert raises(ValueError, 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))') assert array_equal(A[:], arange32(0,4)) def test_bigarray_list(): Z = BigFile_Zero(PS) Zh = Z.fileh_open() ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment