bigarray: Fix __getitem__ for cases where element overlaps with edge between pages

When we serve indexing request, we first compute page range in backing file, which contains the result based on major index range, then mmap that file range and pick up result from there. Page range math was however not correct: e.g. for positive strides, last element's byte is (byte0_stop-1), NOT (byte0_stop - byte0_stride) which for cases where byte0_stop is just a bit after page boundary, can make a difference - page_max will be 1 page less what it should be and then whole ndarray view creation breaks: ... Module wendelin.bigarray, line 381, in __getitem__ view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev) ValueError: strides is incompatible with shape of requested array and size of buffer ( because vma0 was created less in size than what is needed to create view0_shape shaped array starting from view0_offset in vma0. ) Similar story for negative strides math - it was not correct neither. Fix it. /reported-by @Camata

bigarray: Fix getitem for cases where element overlaps with edge between pages
When we serve indexing request, we first compute page range in backing file, which contains the result based on major index range, then mmap that file range and pick up result from there. Page range math was however not correct: e.g. for positive strides, last element's byte is (byte0_stop-1), NOT (byte0_stop - byte0_stride) which for cases where byte0_stop is just a bit after page boundary, can make a difference - page_max will be 1 page less what it should be and then whole ndarray view creation breaks: ... Module wendelin.bigarray, line 381, in __getitem__ view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev) ValueError: strides is incompatible with shape of requested array and size of buffer ( because vma0 was created less in size than what is needed to create view0_shape shaped array starting from view0_offset in vma0. ) Similar story for negative strides math - it was not correct neither. Fix it. /reported-by @Camata
e5b7c31b · Kirill Smelkov · 386ae339 · e5b7c31b · e5b7c31b
Commit e5b7c31b authored Sep 21, 2015 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 56 additions and 2 deletions

bigarray/__init__.py bigarray/__init__.py +9 -2

bigarray/tests/test_basic.py bigarray/tests/test_basic.py +47 -0

No files found.
--- a/bigarray/__init__.py
+++ b/bigarray/__init__.py
@@ -361,10 +361,16 @@ class BigArray(object):
            byte0_start  = idx0_start  * stride0
            byte0_stop   = idx0_stop   * stride0
            byte0_stride = idx0_stride * stride0
+            #print('byte0:\t[%s:%s:%s]' % (byte0_start, byte0_stop, byte0_stride))

            # major slice -> in file pages, always increasing, inclusive
-            page0_min  = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize
-            page0_max  = max(byte0_stop-byte0_stride, byte0_start) // pagesize # TODO -> fileh.pagesize
+            if byte0_stride >= 0:
+                page0_min = byte0_start     // pagesize                 # TODO -> fileh.pagesize
+                page0_max = (byte0_stop-1)  // pagesize                 # TODO -> fileh.pagesize
+            else:
+                page0_min = (byte0_stop  - byte0_stride)     // pagesize# TODO -> fileh.pagesize
+                page0_max = (byte0_start - byte0_stride - 1) // pagesize# TODO -> fileh.pagesize
+            #print('page0:\t[%s, %s]' % (page0_min, page0_max))


            # ~~~ mmap file part corresponding to full major slice into memory
@@ -376,6 +382,7 @@ class BigArray(object):
            view0_offset  = byte0_start - page0_min * pagesize # TODO -> fileh.pagesize
            view0_stridev = (byte0_stride,) + self._stridev[1:]
            #print('view0_shape:\t', view0_shape, self.shape)
+            #print('view0_stridv:\t', view0_stridev)
            #print('view0_offset:\t', view0_offset)
            #print('len(vma0):\t', len(vma0))
            view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev)

--- a/bigarray/tests/test_basic.py
+++ b/bigarray/tests/test_basic.py
@@ -99,6 +99,14 @@ class DoubleGet:
        return self.obj1[key], self.obj2[key]


+# DoubleCheck(A1, A2)[key] -> assert array_equal(A1[key], A2[key])
+class DoubleCheck(DoubleGet):
+
+    def __getitem__(self, key):
+        a1, a2 = DoubleGet.__getitem__(self, key)
+        assert array_equal(a1, a2)
+
+
 # getitem/setitem (1d case)
 def test_bigarray_indexing_1d():
    Z  = BigFile_Zero(PS)
@@ -259,6 +267,45 @@ def test_bigarray_indexing_1d():
    assert raises(ValueError, 'A[:4] = range(5)')


+# indexing where accessed element overlaps edge between pages
+def test_bigarray_indexing_pageedge():
+    shape = (10, PS-1)
+    data  = arange(mul(shape), dtype=uint32).view(uint8)    # NOTE 4 times bigger than uint8
+
+    f  = BigFile_Data_RO(data, PS)
+    fh = f.fileh_open()
+
+    A  = BigArray(shape, uint8, fh)                     # bigarray with test data and shape
+    A_ = data[:mul(shape)].reshape(shape)               # ndarray  ----//----
+
+    # AA[key] -> assert array_equal(A[key], A_[key])
+    AA = DoubleCheck(A, A_)
+
+    AA[0]
+    AA[1]           # tail of page0 - page1
+    AA[1:2]         # ---- // ----
+    AA[1:2:-1]      # []
+    AA[1:0]         # []
+    AA[1:0:-1]      # tail of page0 - page1
+
+
+    shape = (10, PS+1)
+    f  = BigFile_Data_RO(data, PS)
+    fh = f.fileh_open()
+
+    A  = BigArray(shape, uint8, fh)
+    A_ = data[:mul(shape)].reshape(shape)
+
+    AA = DoubleCheck(A, A_)
+
+    AA[0]           # page0 - head of page1
+    AA[0:1]         # ---- // ----
+    AA[0:1:-1]      # []
+    AA[1:0]         # []
+    AA[1:0:-1]      # page0 - head of page1
+
+
+
 # given dimension length n, yield index variants to test
 def indices_to_test(n):
    # ":"