Commit ab9ca2df authored by Kirill Smelkov's avatar Kirill Smelkov

bigarray: Add support for FORTRAN ordering

Until now BigArrays could use only C-style ordering - where major index
is the first one.

Fortran ordering is the opposite - where major index is the last one -
and is used in Fortran world and sometimes by scientists in other areas.

As people keep on asking for Fortran-ordered BigArrays, let's add
support for it. The essential code change is to change 0'th index to
major index in __getitem__ and rest of the code.

For the reference: ndarray memory layout for different orders is
described here:

    http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html#internal-memory-layout-of-an-ndarray
parent c507d900
...@@ -36,7 +36,7 @@ of physical RAM. ...@@ -36,7 +36,7 @@ of physical RAM.
from __future__ import print_function from __future__ import print_function
from wendelin.lib.calc import mul from wendelin.lib.calc import mul
from numpy import ndarray, dtype, sign, newaxis, asarray from numpy import ndarray, dtype, sign, newaxis, asarray, argmax
import logging import logging
...@@ -53,9 +53,9 @@ class BigArray(object): ...@@ -53,9 +53,9 @@ class BigArray(object):
# ._stridev []int # ._stridev []int
# j-1 # j-1
# strj = prod shapej # XXX *dtype.itemsize # strj = prod shapej # XXX *dtype.itemsize (for C order)
# XXX on start translates to strides #
# .order 'C' or 'F' XXX other orders? # ._order 'C' or 'F'
'_v_fileh', # bigfile memory mapping for this array '_v_fileh', # bigfile memory mapping for this array
) )
...@@ -64,7 +64,6 @@ class BigArray(object): ...@@ -64,7 +64,6 @@ class BigArray(object):
# TODO doc -> see ndarray # TODO doc -> see ndarray
# NOTE does not accept strides # NOTE does not accept strides
# TODO handle order
# NOTE please be cooperative to ZBigArray and name helper data members starting with _v_ # NOTE please be cooperative to ZBigArray and name helper data members starting with _v_
def __init__(self, shape, dtype_, bigfileh, order='C'): def __init__(self, shape, dtype_, bigfileh, order='C'):
self._init0(shape, dtype_, order) self._init0(shape, dtype_, order)
...@@ -75,18 +74,29 @@ class BigArray(object): ...@@ -75,18 +74,29 @@ class BigArray(object):
def _init0(self, shape, dtype_, order): def _init0(self, shape, dtype_, order):
self._dtype = dtype(dtype_) self._dtype = dtype(dtype_)
self._shape = shape self._shape = shape
self._order = order
# TODO +offset ? # TODO +offset ?
# TODO +strides ? # TODO +strides ?
if order != 'C': # order -> stride_in_items(i)
ordering = {
'C': lambda i: mul(shape[i+1:]),
'F': lambda i: mul(shape[:i]),
}
Si = ordering.get(order)
if Si is None:
raise NotImplementedError('Order %s not supported' % order) raise NotImplementedError('Order %s not supported' % order)
# shape, dtype -> ._stridev # shape, dtype -> ._stridev
# TODO take dtype.alignment into account ? # TODO take dtype.alignment into account ?
self._stridev = tuple( mul(shape[i+1:]) * self._dtype.itemsize \ self._stridev = tuple( Si(i) * self._dtype.itemsize \
for i in range(len(shape)) ) for i in range(len(shape)) )
# major axis for this array
@property
def _major_axis(self):
return argmax(self._stridev) # NOTE assumes _stridev >= 0
# ~~~ ndarray-like attributes # ~~~ ndarray-like attributes
...@@ -114,8 +124,8 @@ class BigArray(object): ...@@ -114,8 +124,8 @@ class BigArray(object):
return mul(self._shape) return mul(self._shape)
def __len__(self): def __len__(self):
# lengths of the first axis # lengths of the major axis
return self._shape[0] # FIXME valid only for C-order return self._shape[self._major_axis]
@property @property
def itemsize(self): def itemsize(self):
...@@ -177,7 +187,7 @@ class BigArray(object): ...@@ -177,7 +187,7 @@ class BigArray(object):
# end. # end.
# #
# TODO discard data from backing file on shrinks. # TODO discard data from backing file on shrinks.
self._init0(new_shape, self.dtype, order='C') # FIXME order hardcoded self._init0(new_shape, self.dtype, order=self._order)
# append BigArray in-place # append BigArray in-place
...@@ -195,28 +205,30 @@ class BigArray(object): ...@@ -195,28 +205,30 @@ class BigArray(object):
# #
# BigArray (N,10,5) # BigArray (N,10,5)
# values (3,10,5) # values (3,10,5)
#
# XXX we assume major axis is 0 (C ordering)
def append(self, values): def append(self, values):
values = asarray(values) values = asarray(values)
# make sure appended values, after major axis, are of the same shape # make sure appended values, after major axis, are of the same shape
if self.shape[1:] != values.shape[1:]: M = self._major_axis
if self.shape[:M] != values.shape[:M] or self.shape[M+1:] != values.shape[M+1:]:
# NOTE the same exception as in numpy.append() # NOTE the same exception as in numpy.append()
raise ValueError('all the input array dimensions except for the' raise ValueError('all the input array dimensions except for the'
'concatenation axis must match exactly') 'concatenation axis must match exactly')
# resize us, and prepare to rollback, in case of e.g. dtype # resize us, and prepare to rollback, in case of e.g. dtype
# incompatibility catched on follow-up assignment # incompatibility catched on follow-up assignment
n, delta = self.shape[0], values.shape[0] n, delta = self.shape[M], values.shape[M]
self.resize( (n+delta,) + self.shape[1:] ) self.resize( self.shape[:M] + (n+delta,) + self.shape[M+1:] )
# copy values to prepared tail place, and we are done # copy values to prepared tail place, and we are done
try: try:
self[-delta:] = values # delta_idx = [-delta:] in M, : in all other axis
delta_idx = [slice(None)] * len(self.shape)
delta_idx[M] = slice(-delta, None)
self[tuple(delta_idx)] = values
except: except:
# in case of error - rollback the resize and re-raise # in case of error - rollback the resize and re-raise
self.resize( (n,) + self.shape[1:] ) self.resize( self.shape[:M] + (n,) + self.shape[M+1:] )
raise raise
...@@ -326,71 +338,74 @@ class BigArray(object): ...@@ -326,71 +338,74 @@ class BigArray(object):
# account after we take ndarray view # account after we take ndarray view
# major index / stride # major index / stride
# FIXME assumes C ordering M = self._major_axis
idx0 = idx[0] idxM = idx[M]
stride0 = self._stridev[0] strideM = self._stridev[M]
shape0 = self._shape[0] shapeM = self._shape[M]
# utility: replace M'th element in a sequence tuple/list -> tuple
Mreplace = lambda t, value: tuple(t[:M]) + (value,) + tuple(t[M+1:])
# major idx start/stop/stride # major idx start/stop/stride
try: try:
idx0_start, idx0_stop, idx0_stride = idx0.indices(shape0) idxM_start, idxM_stop, idxM_stride = idxM.indices(shapeM)
except OverflowError as e: except OverflowError as e:
# overflow error here means slice indices do not fit into std long, # overflow error here means slice indices do not fit into std long,
# which also practically means we cannot allocate such amount of # which also practically means we cannot allocate such amount of
# address space. # address space.
raise MemoryError(e) raise MemoryError(e)
#print('idx0:\t', idx0, '-> [%s:%s:%s]' % (idx0_start, idx0_stop, idx0_stride)) #print('idxM:\t', idxM, '-> [%s:%s:%s]' % (idxM_start, idxM_stop, idxM_stride))
#print('strid0:\t', stride0) #, self._stridev #print('stridM:\t', strideM) #, self._stridev
#print('shape0:\t', shape0) #, self._shape #print('shapeM:\t', shapeM) #, self._shape
# nitems in major row # nitems in major row
nitems0 = (idx0_stop - idx0_start - sign(idx0_stride)) // idx0_stride + 1 nitemsM = (idxM_stop - idxM_start - sign(idxM_stride)) // idxM_stride + 1
#print('nitem0:\t', nitems0) #print('nitemM:\t', nitemsM)
# if major row is "empty" slice, we can build view right away without creating vma. # if major row is "empty" slice, we can build view right away without creating vma.
# e.g. 10:5:1, 5:10:-1, 5:5, size+100:size+200 -> [] # e.g. 10:5:1, 5:10:-1, 5:5, size+100:size+200 -> []
if nitems0 <= 0: if nitemsM <= 0:
view = ndarray((0,) + self._shape[1:], self._dtype) view = ndarray(Mreplace(self._shape, 0), self._dtype)
# create appropriate vma and ndarray view to it # create appropriate vma and ndarray view to it
else: else:
# major slice -> in bytes # major slice -> in bytes
byte0_start = idx0_start * stride0 byteM_start = idxM_start * strideM
byte0_stop = idx0_stop * stride0 byteM_stop = idxM_stop * strideM
byte0_stride = idx0_stride * stride0 byteM_stride = idxM_stride * strideM
#print('byte0:\t[%s:%s:%s]' % (byte0_start, byte0_stop, byte0_stride)) #print('byteM:\t[%s:%s:%s]' % (byteM_start, byteM_stop, byteM_stride))
# major slice -> in file pages, always increasing, inclusive # major slice -> in file pages, always increasing, inclusive
if byte0_stride >= 0: if byteM_stride >= 0:
page0_min = byte0_start // pagesize # TODO -> fileh.pagesize pageM_min = byteM_start // pagesize # TODO -> fileh.pagesize
page0_max = (byte0_stop-1) // pagesize # TODO -> fileh.pagesize pageM_max = (byteM_stop-1) // pagesize # TODO -> fileh.pagesize
else: else:
page0_min = (byte0_stop - byte0_stride) // pagesize# TODO -> fileh.pagesize pageM_min = (byteM_stop - byteM_stride) // pagesize# TODO -> fileh.pagesize
page0_max = (byte0_start - byte0_stride - 1) // pagesize# TODO -> fileh.pagesize pageM_max = (byteM_start - byteM_stride - 1) // pagesize# TODO -> fileh.pagesize
#print('page0:\t[%s, %s]' % (page0_min, page0_max)) #print('pageM:\t[%s, %s]' % (pageM_min, pageM_max))
# ~~~ mmap file part corresponding to full major slice into memory # ~~~ mmap file part corresponding to full major slice into memory
vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1) vmaM = self._fileh.mmap(pageM_min, pageM_max-pageM_min+1)
# first get ndarray view with only major slice specified and rest indices being ":" # first get ndarray view with only major slice specified and rest indices being ":"
view0_shape = (nitems0,) + self._shape[1:] viewM_shape = Mreplace(self._shape, nitemsM)
view0_offset = byte0_start - page0_min * pagesize # TODO -> fileh.pagesize viewM_offset = byteM_start - pageM_min * pagesize # TODO -> fileh.pagesize
view0_stridev = (byte0_stride,) + self._stridev[1:] viewM_stridev = Mreplace(self._stridev, byteM_stride)
#print('view0_shape:\t', view0_shape, self.shape) #print('viewM_shape:\t', viewM_shape, self.shape)
#print('view0_stridv:\t', view0_stridev) #print('viewM_stridv:\t', viewM_stridev)
#print('view0_offset:\t', view0_offset) #print('viewM_offset:\t', viewM_offset)
#print('len(vma0):\t', len(vma0)) #print('len(vmaM):\t', len(vmaM))
view0 = ndarray(view0_shape, self._dtype, vma0, view0_offset, view0_stridev) viewM = ndarray(viewM_shape, self._dtype, vmaM, viewM_offset, viewM_stridev)
# now take into account indices after major one # now take into account indices after major one
view = view0[(slice(None),) + tuple(idx[1:])] view = viewM[Mreplace(idx, slice(None))]
#print('view0:\t', view0.shape) #print('viewM:\t', viewM.shape)
#print('view:\t', view.shape) #print('view:\t', view.shape)
#print('View:\t', view) #print('View:\t', view)
......
...@@ -88,6 +88,25 @@ def test_bigarray_basic(): ...@@ -88,6 +88,25 @@ def test_bigarray_basic():
# TODO .base # TODO .base
B = BigArray((10,3), int32, Zh, order='F')
raises(TypeError, "B.data")
assert B.strides == (4, 40)
assert B.dtype == dtype(int32)
# XXX .flags?
# XXX .flat? (non-basic)
# XXX .imag? (non-basic)
# XXX .real? (non-basic)
assert B.size == 10*3
assert len(B) == 3
assert B.itemsize == 4
assert B.nbytes == 4*10*3
assert B.ndim == 2
assert B.shape == (10,3)
# XXX .ctypes (non-basic)
# TODO .base
# DoubleGet(obj1, obj2)[key] -> obj1[key], obj2[key] # DoubleGet(obj1, obj2)[key] -> obj1[key], obj2[key]
class DoubleGet: class DoubleGet:
...@@ -349,59 +368,60 @@ def test_bigarray_indexing_Nd(): ...@@ -349,59 +368,60 @@ def test_bigarray_indexing_Nd():
f = BigFile_Data_RO(data, PS) f = BigFile_Data_RO(data, PS)
fh = f.fileh_open() fh = f.fileh_open()
A = BigArray(shape, uint32, fh) # bigarray with test data and shape for order in ('C', 'F'):
A_ = data[:mul(shape)].reshape(shape) # ndarray ----//---- A = BigArray(shape, uint32, fh, order=order) # bigarray with test data and shape
A_ = data[:mul(shape)].reshape(shape, order=order) # ndarray ----//----
# AA[key] -> A[key], A_[key] # AA[key] -> A[key], A_[key]
AA = DoubleGet(A, A_) AA = DoubleGet(A, A_)
# now just go over combinations of various slice at each dimension, and see # now just go over combinations of various slice at each dimension, and see
# whether slicing result is the same ndarray would do. # whether slicing result is the same ndarray would do.
for idx in idx_to_test(shape): for idx in idx_to_test(shape):
a, a_ = AA[idx] a, a_ = AA[idx]
assert array_equal(a, a_) assert array_equal(a, a_)
# any part of index out of range # any part of index out of range
# - element access -> raises IndexError # - element access -> raises IndexError
# - slice access -> empty # - slice access -> empty
for idxpos in range(len(shape)): for idxpos in range(len(shape)):
idx = [0]*len(shape) idx = [0]*len(shape)
# idx -> tuple(idx) # idx -> tuple(idx)
# ( list would mean advanced indexing - not what we want ) # ( list would mean advanced indexing - not what we want )
idxt = lambda : tuple(idx) idxt = lambda : tuple(idx)
# valid access element access # valid access element access
idx[idxpos] = shape[idxpos] - 1 # 0, 0, 0, Ni-1, 0 ,0, 0 idx[idxpos] = shape[idxpos] - 1 # 0, 0, 0, Ni-1, 0 ,0, 0
a, a_ = AA[idxt()] a, a_ = AA[idxt()]
assert array_equal(a, a_) assert array_equal(a, a_)
# out-of-range element access # out-of-range element access
idx[idxpos] = shape[idxpos] # 0, 0, 0, Ni , 0 ,0, 0 idx[idxpos] = shape[idxpos] # 0, 0, 0, Ni , 0 ,0, 0
raises(IndexError, 'A [idxt()]') raises(IndexError, 'A [idxt()]')
raises(IndexError, 'A_[idxt()]') raises(IndexError, 'A_[idxt()]')
# out-of-range slice access # out-of-range slice access
idx[idxpos] = slice(shape[idxpos], # 0, 0, 0, Ni:Ni+1 , 0 ,0, 0 idx[idxpos] = slice(shape[idxpos], # 0, 0, 0, Ni:Ni+1 , 0 ,0, 0
shape[idxpos]+1) shape[idxpos]+1)
a, a_ = AA[idxt()] a, a_ = AA[idxt()]
assert array_equal(a, a_) assert array_equal(a, a_)
assert a .size == 0 assert a .size == 0
assert a_.size == 0 assert a_.size == 0
# TODO ... -> expanded (0,1,2,negative), rejected if many # TODO ... -> expanded (0,1,2,negative), rejected if many
# TODO newaxis # TODO newaxis
# TODO nidx < len(shape) # TODO nidx < len(shape)
# TODO empty slice in major row, empty slice in secondary row # TODO empty slice in major row, empty slice in secondary row
""" """
# ellipsis - take some idx[a:b] and replace it by ... # ellipsis - take some idx[a:b] and replace it by ...
for ellipsis in range(2): # 0 - no ellipsis for ellipsis in range(2): # 0 - no ellipsis
# newaxis - added after at some position(s) # newaxis - added after at some position(s)
for newaxis in range(3): # 0 - no newaxis for newaxis in range(3): # 0 - no newaxis
""" """
def test_bigarray_resize(): def test_bigarray_resize():
...@@ -465,42 +485,53 @@ def test_bigarray_resize(): ...@@ -465,42 +485,53 @@ def test_bigarray_resize():
# ~ arange(n*3*2).reshape(n,3,2) # ~ arange(n*3*2).reshape(n,3,2)
def arange32(start, stop, dtype=None): def arange32_c(start, stop, dtype=None):
return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2) return arange(start*3*2, stop*3*2, dtype=dtype).reshape((stop-start),3,2)
def arange32_f(start, stop, dtype=None):
return arange(start*3*2, stop*3*2, dtype=dtype).reshape(2,3,(stop-start), order='F')
#return arange(start*3*2, stop*3*2, dtype=dtype).reshape(2,3,(stop-start))
def test_bigarray_append(): def test_bigarray_append():
data = zeros(8*PS, dtype=uint32) for order in ('C', 'F'):
f = BigFile_Data(data, PS) data = zeros(8*PS, dtype=uint32)
fh = f.fileh_open() f = BigFile_Data(data, PS)
fh = f.fileh_open()
# first make sure arange32 works correctly
x = numpy.append( arange32(0,4), arange32(4,7), axis=0 ) arange32 = {'C': arange32_c, 'F': arange32_f} [order]
assert array_equal(x, arange32(0,7))
assert array_equal(x.ravel(), arange(7*3*2)) # first make sure arange32 works correctly
x = numpy.append( arange32(0,4), arange32(4,7), axis={'C': 0, 'F': 2} [order] )
# init empty BigArray of shape (x,3,2) #x = numpy.append( arange32(0,4), arange32(4,7), axis=0)
A = BigArray((0,3,2), int64, fh) #x = numpy.append( arange32(0,4), arange32(4,7))
assert array_equal(A[:], arange32(0,0)) assert array_equal(x, arange32(0,7))
assert array_equal(x.ravel(order), arange(7*3*2))
# append initial data
A.append(arange32(0,2)) # init empty BigArray of shape (x,3,2)
assert array_equal(A[:], arange32(0,2)) A = BigArray({'C': (0,3,2), 'F': (2,3,0)} [order], int64, fh, order=order)
A.append(arange32(2,3)) assert array_equal(A[:], arange32(0,0))
assert array_equal(A[:], arange32(0,3))
# append initial data
# append plain list (test for arg conversion) A.append(arange32(0,2))
A.append([[[18,19], [20,21], [22,23]]]) assert array_equal(A[:], arange32(0,2))
assert array_equal(A[:], arange32(0,4)) A.append(arange32(2,3))
assert array_equal(A[:], arange32(0,3))
# append with incorrect shape - rejected, original stays the same
assert raises(ValueError, 'A.append(arange(3))') # append plain list (test for arg conversion)
assert array_equal(A[:], arange32(0,4)) A.append({'C': [[[18,19], [20,21], [22,23]]], \
assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))') 'F': [[[18],[20],[22]], [[19],[21],[23]]]} [order])
assert array_equal(A[:], arange32(0,4)) assert array_equal(A[:], arange32(0,4))
# append with correct shape, but incompatible dtype - rejected, original stays the same # append with incorrect shape - rejected, original stays the same
assert raises(ValueError, 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))') assert raises(ValueError, 'A.append(arange(3))')
assert array_equal(A[:], arange32(0,4)) assert array_equal(A[:], arange32(0,4))
assert raises(ValueError, 'A.append(arange(3*2).reshape(3,2))')
assert array_equal(A[:], arange32(0,4))
# append with correct shape, but incompatible dtype - rejected, original stays the same
assert raises(ValueError,
{'C': 'A.append(asarray([[[0,1], [2,3], [4,"abc"]]], dtype=object))',
'F': 'A.append(asarray([[[0],[1],[2]], [[3],[4],["abc"]]], dtype=object))'} [order] )
assert array_equal(A[:], arange32(0,4))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment