bigarray: In-place .append()

ca064f75 (bigarray: Support resizing in-place) added O(1) in-place BigArray.resize() which makes possible for users to append data to BigArray in O(δ) time. But it is easy for people to make off-by-one mistakes when calculating indices for append. So provide a convenient BigArray.append() which simplifies the following A # ZBigArray e.g. of shape (N, 3) values # ndarray to append of shape (δ, 3) n, δ = len(A), len(values) # length of A's major index =N A.resize((n+δ, A.shape[1:])) # add δ new entries ; now len(A) =N+δ A[-δ:] = values # set data for last new δ entries into A.append(values) /cc @klaus

bigarray: In-place .append()
ca064f75 (bigarray: Support resizing in-place) added O(1) in-place BigArray.resize() which makes possible for users to append data to BigArray in O(δ) time. But it is easy for people to make off-by-one mistakes when calculating indices for append. So provide a convenient BigArray.append() which simplifies the following A # ZBigArray e.g. of shape (N, 3) values # ndarray to append of shape (δ, 3) n, δ = len(A), len(values) # length of A's major index =N A.resize((n+δ, A.shape[1:])) # add δ new entries ; now len(A) =N+δ A[-δ:] = values # set data for last new δ entries into A.append(values) /cc @klaus
1245acc9 · Kirill Smelkov · 605a2a90 · 1245acc9 · 1245acc9 · 1245acc9
Commit 1245acc9 authored Jul 24, 2015 by Kirill Smelkov
Showing with 94 additions and 4 deletions

bigarray/__init__.py bigarray/__init__.py +41 -1

bigarray/tests/test_arrayzodb.py bigarray/tests/test_arrayzodb.py +10 -2

bigarray/tests/test_basic.py bigarray/tests/test_basic.py +43 -1

No files found.
--- a/bigarray/__init__.py
+++ b/bigarray/__init__.py
@@ -36,7 +36,7 @@ of physical RAM.

 from __future__ import print_function
 from wendelin.lib.calc import mul
-from numpy import ndarray, dtype, sign, newaxis
+from numpy import ndarray, dtype, sign, newaxis, asarray
 import logging


@@ -180,6 +180,46 @@ class BigArray(object):
        self._init0(new_shape, self.dtype, order='C')   # FIXME order hardcoded


+    # append BigArray in-place
+    #
+    # NOTE
+    #
+    # - numpy.append(array, δ)  creates new array and copies array and δ there
+    #                           (works in O(array + δ) time)
+    #
+    # - BigArray.append(δ)      resizes array and copies δ to tail
+    #                           (works in O(δ) time)
+    #
+    # values    - must be ndarray-like with compatible dtype of the same shape
+    #             as extended array, except major axis, e.g.
+    #
+    #   BigArray    (N,10,5)
+    #   values      (3,10,5)
+    #
+    # XXX we assume major axis is 0 (C ordering)
+    def append(self, values):
+        values = asarray(values)
+
+        # make sure appended values, after major axis, are of the same shape
+        if self.shape[1:] != values.shape[1:]:
+            # NOTE the same exception as in numpy.append()
+            raise ValueError('all the input array dimensions except for the'
+                    'concatenation axis must match exactly')
+
+        # resize us, and prepare to rollback, in case of e.g. dtype
+        # incompatibility catched on follow-up assignment
+        n, delta = self.shape[0], values.shape[0]
+        self.resize( (n+delta,) + self.shape[1:] )
+
+        # copy values to prepared tail place, and we are done
+        try:
+            self[-delta:] = values
+        except:
+            # in case of error - rollback the resize and re-raise
+            self.resize( (n,) + self.shape[1:] )
+            raise
+
+

    # ~~~ get/set item/slice connect bigfile blocks to ndarray in RAM.
    #     only basic indexing is supported - see numpy/.../arrays.indexing.rst

--- a/bigarray/tests/test_arrayzodb.py
+++ b/bigarray/tests/test_arrayzodb.py
@@ -21,7 +21,7 @@ from wendelin.lib.zodb import dbclose
 from wendelin.lib.testing import getTestDB
 from persistent import UPTODATE
 import transaction
-from numpy import dtype, uint8, all, array_equal
+from numpy import dtype, uint8, all, array_equal, arange

 testdb = None
 def setup_module():
@@ -148,6 +148,9 @@ def test_zbigarray():
    b[16*1024*1024] = 100
    b[-1]           = 255

+    A.append(arange(10, 14, dtype=uint8))
+
+
    # commit; reload & verify changes
    transaction.commit()
    dbclose(root)
@@ -158,7 +161,7 @@ def test_zbigarray():
    A = root['zarray']

    assert isinstance(A, ZBigArray)
-    assert A.shape  == (24*1024*1024,)
+    assert A.shape  == (24*1024*1024 + 4,)
    assert A.dtype  == dtype(uint8)

    a = A[:]
@@ -170,3 +173,8 @@ def test_zbigarray():

    assert a[16*1024*1024]   == 100
    assert a[24*1024*1024-1] == 255
+
+    assert a[24*1024*1024+0] ==  10
+    assert a[24*1024*1024+1] ==  11
+    assert a[24*1024*1024+2] ==  12
+    assert a[24*1024*1024+3] ==  13
--- a/bigarray/tests/test_basic.py
+++ b/bigarray/tests/test_basic.py
@@ -20,8 +20,9 @@ from wendelin.bigarray import BigArray
 from wendelin.bigfile import BigFile
 from wendelin.lib.mem import memcpy
 from wendelin.lib.calc import mul
-from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \
+from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \
        array_equal, asarray
+import numpy

 from pytest import raises

@@ -415,6 +416,47 @@ def test_bigarray_resize():
    assert array_equal(c.ravel(), arange(11*3, dtype=uint32))


+# ~ arange(n*3*2).reshape(n,3,2)
+def arange32(start, stop, dtype=None):
+    return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2)
+
+def test_bigarray_append():
+    data = zeros(8*PS, dtype=uint32)
+    f   = BigFile_Data(data, PS)
+    fh  = f.fileh_open()
+
+    # first make sure arange32 works correctly
+    x = numpy.append( arange32(0,4), arange32(4,7), axis=0 )
+    assert array_equal(x, arange32(0,7))
+    assert array_equal(x.ravel(), arange(7*3*2))
+
+    # init empty BigArray of shape (x,3,2)
+    A   = BigArray((0,3,2), int64, fh)
+    assert array_equal(A[:], arange32(0,0))
+
+    # append initial data
+    A.append(arange32(0,2))
+    assert array_equal(A[:], arange32(0,2))
+    A.append(arange32(2,3))
+    assert array_equal(A[:], arange32(0,3))
+
+    # append plain list (test for arg conversion)
+    A.append([[[18,19], [20,21], [22,23]]])
+    assert array_equal(A[:], arange32(0,4))
+
+    # append with incorrect shape - rejected, original stays the same
+    assert raises(ValueError, 'A.append(arange(3))')
+    assert array_equal(A[:], arange32(0,4))
+    assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))')
+    assert array_equal(A[:], arange32(0,4))
+
+    # append with correct shape, but incompatible dtype - rejected, original stays the same
+    assert raises(ValueError, 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))')
+    assert array_equal(A[:], arange32(0,4))
+
+
+
+
 def test_bigarray_list():
    Z  = BigFile_Zero(PS)
    Zh = Z.fileh_open()