Commit d53371b6 authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: Add ArrayRef utility

ArrayRef is a tool to find out for a NumPy array its top-level root
parent and remember instructions how to recreate original array from
the root. For example if

	root = arange(1E7)
	z = root[1000:2000]
	a = z[10:20]

`ArrayRef(a)` will find out that the root array for `a` is `root` and
that `a` occupies 1010:1020 bytes in it. The vice versa operation is
also possible, for example given

	aref = ArrayRef(a)

it is possible to restore original `a` from `aref`:

	a_ = aref.deref()
	assert array_equal(a_, a)

the restoration works without copying by creating appropriate view of
root.

ArrayRef should work reliably for arrays of arbitrary dimensions,
strides etc - even fancy arrays created via stride tricks such as arrays
whose elements overlap each other should be supported.

This patch adds ArrayRef with support for regular ndarrays only.

The next patch will add ArrayRef support for BigArray and description
for ArrayRef rationale.
parent f785ac07
This diff is collapsed.
# -*- coding: utf-8 -*-
# Wendeling.core.bigarray | Basic tests # Wendeling.core.bigarray | Basic tests
# Copyright (C) 2014-2015 Nexedi SA and Contributors. # Copyright (C) 2014-2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -18,12 +19,13 @@ ...@@ -18,12 +19,13 @@
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
from wendelin.bigarray import BigArray from wendelin.bigarray import BigArray, ArrayRef, _flatbytev
from wendelin.bigfile import BigFile from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int64, int32, uint32, uint8, all, zeros, arange, \ from numpy import ndarray, dtype, int64, int32, uint32, int16, uint8, all, zeros, arange, \
array_equal, asarray array_equal, asarray, newaxis, swapaxes
from numpy.lib.stride_tricks import as_strided
import numpy import numpy
from pytest import raises from pytest import raises
...@@ -588,3 +590,227 @@ def test_bigarray_to_ndarray(): ...@@ -588,3 +590,227 @@ def test_bigarray_to_ndarray():
for i in range(48,65): for i in range(48,65):
C = BigArray(((1<<i)-1,), uint8, Zh) C = BigArray(((1<<i)-1,), uint8, Zh)
raises(MemoryError, 'asarray(C)') raises(MemoryError, 'asarray(C)')
def test_arrayref():
# test data - all items are unique - so we can check array by content
data = zeros(PS, dtype=uint8)
data32 = data.view(uint32)
data32[:] = arange(len(data32), dtype=uint32)
data[:256] = arange(256, dtype=uint8) # first starting bytes are all unique
# regular ndarray without parent at all
ref = ArrayRef(data)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == len(data)
assert ref.z0 == 0
assert ref.shape == data.shape
assert ref.stridev == data.strides
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), data)
# regular ndarrays with parent
ref = ArrayRef(data32)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == len(data)
assert ref.z0 == 0
assert ref.shape == data32.shape
assert ref.stridev == data32.strides
assert ref.dtype == data32.dtype
assert array_equal(ref.deref(), data32)
a = data[100:140]
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 100
assert ref.hi == 140
assert ref.z0 == 0
assert ref.shape == (40,)
assert ref.stridev == (1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
a = data[140:100:-1]
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 101
assert ref.hi == 141
assert ref.z0 == 39
assert ref.shape == (40,)
assert ref.stridev == (-1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
a = data[100:140:-1] # empty
ref = ArrayRef(a)
assert ref.root is data
assert ref.lo == 0
assert ref.hi == 1
assert ref.z0 == 0
assert ref.shape == (0,)
assert ref.stridev == (1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
# rdata is the same as data[::-1] but without base - i.e. it is toplevel
m = memoryview(data[::-1])
rdata = asarray(m)
assert array_equal(rdata[::-1], data)
assert rdata.strides == (-1,)
m_ = rdata.base
assert isinstance(m_, memoryview)
#assert m_ is m XXX strangely it is another object, not exactly m
# XXX however rdata.strides<0 and no rdata.base.base is enough for us here.
raises(AttributeError, 'm_.base')
a = rdata[100:140]
ref = ArrayRef(a)
assert ref.root is rdata
assert ref.lo == PS - 140
assert ref.hi == PS - 100
assert ref.z0 == 39
assert ref.shape == (40,)
assert ref.stridev == (-1,)
assert ref.dtype == data.dtype
assert array_equal(ref.deref(), a)
for root in (data, rdata):
# refok verifies whether ArrayRef(x) works ok
def refok(x):
ref = ArrayRef(x)
assert ref.root is root
x_ = ref.deref()
assert array_equal(x_, x)
assert x_.dtype == x.dtype
assert type(x_) == type(x)
# check that deref won't access range outside lo:hi - by copying
# root, setting bytes in adjusted root outside lo:hi to either 0x00
# or 0xff and tweaking ref.root = root_.
root_ = numpy.copy(_flatbytev(root[:]))
root_[:ref.lo] = 0
root_[ref.hi:] = 0
ref.root = root_
assert array_equal(ref.deref(), x)
root_[:ref.lo] = 0xff
root_[ref.hi:] = 0xff
assert array_equal(ref.deref(), x)
if root is rdata:
a = root[::-1] # rdata
else:
a = root[:] # data
assert array_equal(a, data)
# subslices that is possible to get by just indexing
refok( a[:] )
refok( a[1:2] )
refok( a[1:10] )
refok( a[1:10:2] )
refok( a[1:10:3] )
refok( a[1:10:-1] ) # empty (.size = 0)
refok( a[10:1:-1] )
refok( a[10:1:-2] )
refok( a[10:1:-3] )
# long chain root -> a -> a[...] -> a[...] -> leaf
l = a[2:118]
l = l.view(uint32)[3:20]
l = l[1:9]
refok(l)
# not aligned - it is not possible to get to resulting slice just by indexing A
refok( a.view(uint8)[2:-2].view(uint32) )
refok( a.view(uint8)[2:-2].view(uint32)[::-1] )
refok( a.view(int64) ) # change of type ↑ in size
refok( a.view(int64)[::-1] )
refok( a.view(int16) ) # change of type ↓ in size
refok( a.view(int16)[::-1] )
# change of type to size not multiple of original
refok( a[1:1+5*10].view('V5') ) # 4 -> 5
refok( a[1:1+5*10].view('V5')[::-1] )
refok( a[1:1+3*10].view('V3') ) # 4 -> 3
refok( a[1:1+3*10].view('V3')[::-1] )
# intermediate parent with <0 stride
r = a[1:1+3*10].view('V3')[::-1]
refok( r[-2:2:-1] )
# 2d array
x = a.view(uint32).reshape((8, -1))
y = swapaxes(x, 0,1)
assert x.shape == (8, PS//(4*8))
assert x.strides == (PS//8, 4)
assert y.shape == (PS//(4*8), 8)
assert y.strides == (4, PS//8)
refok( x )
refok( y )
# array with both >0 and <0 strides
x_ = x[:,::-1]
y_ = y[:,::-1]
assert x_.shape == x.shape
assert x_.strides == (PS//8, -4)
assert y_.shape == y.shape
assert y_.strides == (4, -PS//8)
refok( x_ )
refok( y_ )
# array with [1] dimension
z1 = x[:, newaxis, :]
assert z1.shape == (8, 1, PS//(4*8))
assert z1.strides == (PS//8, 0, 4)
refok(z1)
# array with [0] dimension
z0 = z1[:, 0:0, :]
assert z0.shape == (8, 0, PS//(4*8))
assert z0.strides == (PS//8, 0, 4)
refok(z0)
# tricky array overlapping itself
t = a.view(uint32)
assert t.shape == (PS//4,)
assert t.strides == (4,)
assert t.itemsize == 4
t = as_strided(t, strides=(1,))
assert t.shape == (PS//4,)
assert t.strides == (1,)
assert t.itemsize == 4
refok(t)
# structured dtype
s = a.view(dtype=[('width', '<i2'), ('length', '<i2')])
assert s.shape == (PS//4,)
assert s.strides == (4,)
assert s.itemsize == 4
refok(s)
s_ = s['length']
assert s_.shape == (PS//4,)
assert s_.strides == (4,)
assert s_.itemsize == 2
refok(s_)
# ndarray subclass, e.g. np.recarray
r = s.view(type=numpy.recarray)
assert isinstance(r, numpy.recarray)
assert r.shape == (PS//4,)
assert r.strides == (4,)
assert r.itemsize == 4
assert array_equal(r.length, s['length'])
refok(r)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment