Commit 1245acc9 authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: In-place .append()

ca064f75 (bigarray: Support resizing in-place) added O(1) in-place
BigArray.resize() which makes possible for users to append data to BigArray in
O(δ) time.

But it is easy for people to make off-by-one mistakes when calculating
indices for append.

So provide a convenient BigArray.append() which simplifies the following

    A                               # ZBigArray e.g. of shape       (N, 3)
    values                          # ndarray to append of shape    (δ, 3)
    n, δ = len(A), len(values)      # length of A's major index  =N
    A.resize((n+δ, A.shape[1:]))    # add δ new entries ; now len(A) =N+δ
    A[-δ:] = values                 # set data for last new δ entries

into

    A.append(values)

/cc @klaus
parent 605a2a90
......@@ -36,7 +36,7 @@ of physical RAM.
from __future__ import print_function
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, sign, newaxis
from numpy import ndarray, dtype, sign, newaxis, asarray
import logging
......@@ -180,6 +180,46 @@ class BigArray(object):
self._init0(new_shape, self.dtype, order='C') # FIXME order hardcoded
# append BigArray in-place
#
# NOTE
#
# - numpy.append(array, δ) creates new array and copies array and δ there
# (works in O(array + δ) time)
#
# - BigArray.append(δ) resizes array and copies δ to tail
# (works in O(δ) time)
#
# values - must be ndarray-like with compatible dtype of the same shape
# as extended array, except major axis, e.g.
#
# BigArray (N,10,5)
# values (3,10,5)
#
# XXX we assume major axis is 0 (C ordering)
def append(self, values):
values = asarray(values)
# make sure appended values, after major axis, are of the same shape
if self.shape[1:] != values.shape[1:]:
# NOTE the same exception as in numpy.append()
raise ValueError('all the input array dimensions except for the'
'concatenation axis must match exactly')
# resize us, and prepare to rollback, in case of e.g. dtype
# incompatibility catched on follow-up assignment
n, delta = self.shape[0], values.shape[0]
self.resize( (n+delta,) + self.shape[1:] )
# copy values to prepared tail place, and we are done
try:
self[-delta:] = values
except:
# in case of error - rollback the resize and re-raise
self.resize( (n,) + self.shape[1:] )
raise
# ~~~ get/set item/slice connect bigfile blocks to ndarray in RAM.
# only basic indexing is supported - see numpy/.../arrays.indexing.rst
......
......@@ -21,7 +21,7 @@ from wendelin.lib.zodb import dbclose
from wendelin.lib.testing import getTestDB
from persistent import UPTODATE
import transaction
from numpy import dtype, uint8, all, array_equal
from numpy import dtype, uint8, all, array_equal, arange
testdb = None
def setup_module():
......@@ -148,6 +148,9 @@ def test_zbigarray():
b[16*1024*1024] = 100
b[-1] = 255
A.append(arange(10, 14, dtype=uint8))
# commit; reload & verify changes
transaction.commit()
dbclose(root)
......@@ -158,7 +161,7 @@ def test_zbigarray():
A = root['zarray']
assert isinstance(A, ZBigArray)
assert A.shape == (24*1024*1024,)
assert A.shape == (24*1024*1024 + 4,)
assert A.dtype == dtype(uint8)
a = A[:]
......@@ -170,3 +173,8 @@ def test_zbigarray():
assert a[16*1024*1024] == 100
assert a[24*1024*1024-1] == 255
assert a[24*1024*1024+0] == 10
assert a[24*1024*1024+1] == 11
assert a[24*1024*1024+2] == 12
assert a[24*1024*1024+3] == 13
......@@ -20,8 +20,9 @@ from wendelin.bigarray import BigArray
from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \
from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \
array_equal, asarray
import numpy
from pytest import raises
......@@ -415,6 +416,47 @@ def test_bigarray_resize():
assert array_equal(c.ravel(), arange(11*3, dtype=uint32))
# ~ arange(n*3*2).reshape(n,3,2)
def arange32(start, stop, dtype=None):
return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2)
def test_bigarray_append():
data = zeros(8*PS, dtype=uint32)
f = BigFile_Data(data, PS)
fh = f.fileh_open()
# first make sure arange32 works correctly
x = numpy.append( arange32(0,4), arange32(4,7), axis=0 )
assert array_equal(x, arange32(0,7))
assert array_equal(x.ravel(), arange(7*3*2))
# init empty BigArray of shape (x,3,2)
A = BigArray((0,3,2), int64, fh)
assert array_equal(A[:], arange32(0,0))
# append initial data
A.append(arange32(0,2))
assert array_equal(A[:], arange32(0,2))
A.append(arange32(2,3))
assert array_equal(A[:], arange32(0,3))
# append plain list (test for arg conversion)
A.append([[[18,19], [20,21], [22,23]]])
assert array_equal(A[:], arange32(0,4))
# append with incorrect shape - rejected, original stays the same
assert raises(ValueError, 'A.append(arange(3))')
assert array_equal(A[:], arange32(0,4))
assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))')
assert array_equal(A[:], arange32(0,4))
# append with correct shape, but incompatible dtype - rejected, original stays the same
assert raises(ValueError, 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))')
assert array_equal(A[:], arange32(0,4))
def test_bigarray_list():
Z = BigFile_Zero(PS)
Zh = Z.fileh_open()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment