Commit 15d052fc authored by Dag Sverre Seljebotn's avatar Dag Sverre Seljebotn

More buffer typestring; fixes #285

parent ba35eb82
...@@ -1002,6 +1002,14 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { ...@@ -1002,6 +1002,14 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
return 0; return 0;
} }
static int __Pyx_BufFmt_FirstPack(__Pyx_BufFmt_Context* ctx) {
if (ctx->enc_type != 0 || ctx->packmode != '@') {
PyErr_SetString(PyExc_ValueError, "Buffer packing mode currently only allowed at beginning of format string (this is a defect)");
return -1;
}
return 0;
}
static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
int got_Z = 0; int got_Z = 0;
while (1) { while (1) {
...@@ -1027,6 +1035,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha ...@@ -1027,6 +1035,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
return NULL; return NULL;
} }
if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
ctx->packmode = '='; ctx->packmode = '=';
++ts; ++ts;
break; break;
...@@ -1036,13 +1045,15 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha ...@@ -1036,13 +1045,15 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
return NULL; return NULL;
} }
if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
ctx->packmode = '='; ctx->packmode = '=';
++ts; ++ts;
break; break;
case '=': case '=':
case '@': case '@':
case '^': case '^':
ctx->packmode = *ts++; if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
ctx->packmode = *ts++;
break; break;
case 'T': /* substruct */ case 'T': /* substruct */
{ {
......
...@@ -16,7 +16,7 @@ cimport stdlib ...@@ -16,7 +16,7 @@ cimport stdlib
cdef extern from "Python.h": cdef extern from "Python.h":
ctypedef int Py_intptr_t ctypedef int Py_intptr_t
cdef extern from "numpy/arrayobject.h": cdef extern from "numpy/arrayobject.h":
ctypedef Py_intptr_t npy_intp ctypedef Py_intptr_t npy_intp
...@@ -68,6 +68,9 @@ cdef extern from "numpy/arrayobject.h": ...@@ -68,6 +68,9 @@ cdef extern from "numpy/arrayobject.h":
# In particular strided access is always provided regardless # In particular strided access is always provided regardless
# of flags # of flags
cdef int copy_shape, i, ndim cdef int copy_shape, i, ndim
cdef int endian_detector = 1
cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
ndim = PyArray_NDIM(self) ndim = PyArray_NDIM(self)
if sizeof(npy_intp) != sizeof(Py_ssize_t): if sizeof(npy_intp) != sizeof(Py_ssize_t):
...@@ -105,7 +108,6 @@ cdef extern from "numpy/arrayobject.h": ...@@ -105,7 +108,6 @@ cdef extern from "numpy/arrayobject.h":
cdef dtype descr = self.descr cdef dtype descr = self.descr
cdef list stack cdef list stack
cdef int offset cdef int offset
cdef char byteorder = 0
cdef bint hasfields = PyDataType_HASFIELDS(descr) cdef bint hasfields = PyDataType_HASFIELDS(descr)
...@@ -118,6 +120,9 @@ cdef extern from "numpy/arrayobject.h": ...@@ -118,6 +120,9 @@ cdef extern from "numpy/arrayobject.h":
if not hasfields: if not hasfields:
t = descr.type_num t = descr.type_num
if ((descr.byteorder == '>' and little_endian) or
(descr.byteorder == '<' and not little_endian)):
raise ValueError("Non-native byte order not supported")
if t == NPY_BYTE: f = "b" if t == NPY_BYTE: f = "b"
elif t == NPY_UBYTE: f = "B" elif t == NPY_UBYTE: f = "B"
elif t == NPY_SHORT: f = "h" elif t == NPY_SHORT: f = "h"
...@@ -141,10 +146,11 @@ cdef extern from "numpy/arrayobject.h": ...@@ -141,10 +146,11 @@ cdef extern from "numpy/arrayobject.h":
return return
else: else:
info.format = <char*>stdlib.malloc(_buffer_format_string_len) info.format = <char*>stdlib.malloc(_buffer_format_string_len)
info.format[0] = '^' # Native data types, manual alignment
offset = 0 offset = 0
f = _util_dtypestring(descr, info.format, f = _util_dtypestring(descr, info.format + 1,
info.format + _buffer_format_string_len, info.format + _buffer_format_string_len,
&offset, &byteorder) &offset)
f[0] = 0 # Terminate format string f[0] = 0 # Terminate format string
def __releasebuffer__(ndarray self, Py_buffer* info): def __releasebuffer__(ndarray self, Py_buffer* info):
...@@ -257,39 +263,45 @@ ctypedef npy_cdouble cdouble_t ...@@ -257,39 +263,45 @@ ctypedef npy_cdouble cdouble_t
ctypedef npy_clongdouble clongdouble_t ctypedef npy_clongdouble clongdouble_t
cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset, char* byteorder) except NULL: cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
# Recursive utility function used in __getbuffer__ to get format # Recursive utility function used in __getbuffer__ to get format
# string. The new location in the format string is returned. # string. The new location in the format string is returned.
cdef dtype child cdef dtype child
cdef int delta_offset cdef int delta_offset
cdef tuple i cdef tuple i
cdef char new_byteorder cdef int endian_detector = 1
cdef bint little_endian = ((<char*>&endian_detector)[0] == 0)
for i in descr.fields.itervalues(): for i in descr.fields.itervalues():
child = i[0] child = i[0]
new_offset = i[1] new_offset = i[1]
if (end - f) - (new_offset - offset[0]) < 15: # this should leave room for "T{" and "}" as well if (end - f) - (new_offset - offset[0]) < 15:
raise RuntimeError("Format string allocated too short, see comment in numpy.pxd") raise RuntimeError("Format string allocated too short, see comment in numpy.pxd")
# new_byteorder = child.byteorder if ((child.byteorder == '>' and little_endian) or
# if new_byteorder == '|': new_byteorder = '=' (child.byteorder == '<' and not little_endian)):
# if byteorder[0] != new_byteorder: raise ValueError("Non-native byte order not supported")
# f[0] = new_byteorder # One could encode it in the format string and have Cython
# f += 1 # complain instead, BUT: < and > in format strings also imply
# byteorder[0] = new_byteorder # standardized sizes for datatypes, and we rely on native in
# order to avoid reencoding data types based on their size.
#
# A proper PEP 3118 exporter for other clients than Cython
# must deal properly with this!
# Output padding bytes # Output padding bytes
# while offset[0] < new_offset: while offset[0] < new_offset:
# f[0] = 120 # "x"; pad byte f[0] = 120 # "x"; pad byte
# f += 1 f += 1
# offset[0] += 1 offset[0] += 1
offset[0] += child.itemsize offset[0] += child.itemsize
if not PyDataType_HASFIELDS(child): if not PyDataType_HASFIELDS(child):
t = child.type_num t = child.type_num
if end - f < 15: # this should leave room for "T{" and "}" as well if end - f < 5:
raise RuntimeError("Format string allocated too short.") raise RuntimeError("Format string allocated too short.")
# Until ticket #99 is fixed, use integers to avoid warnings # Until ticket #99 is fixed, use integers to avoid warnings
...@@ -314,11 +326,8 @@ cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset ...@@ -314,11 +326,8 @@ cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset
raise ValueError("unknown dtype code in numpy.pxd (%d)" % t) raise ValueError("unknown dtype code in numpy.pxd (%d)" % t)
f += 1 f += 1
else: else:
f[0] = 84 #"T" # Cython ignores struct boundary information ("T{...}"),
f[1] = 123 #"{" # so don't output it
f += 2 f = _util_dtypestring(child, f, end, offset)
f = _util_dtypestring(child, f, end, offset, byteorder)
f[0] = 125 #"}"
f += 1
return f return f
...@@ -26,10 +26,10 @@ cdef struct align_of_int_helper: ...@@ -26,10 +26,10 @@ cdef struct align_of_int_helper:
int i int i
double_align = sizeof(align_of_double_helper) - sizeof(double) double_align = sizeof(align_of_double_helper) - sizeof(double)
int_align = sizeof(align_of_int_helper) - sizeof(int) int_align = sizeof(align_of_int_helper) - sizeof(int)
if double_align != 8: if double_align != 8 or sizeof(double) != 8:
raise RuntimeError("Alignment of double is %d on this system, please report to cython-dev for a testcase fix" % double_align) raise RuntimeError("Alignment or size of double is %d on this system, please report to cython-dev for a testcase fix" % double_align)
if int_align != 4: if int_align != 4 or sizeof(int) != 4:
raise RuntimeError("Alignment of int is %d on this system, please report to cython-dev for a testcase fix" % int_align) raise RuntimeError("Alignment or size of int is %d on this system, please report to cython-dev for a testcase fix" % int_align)
cdef class MockBuffer: cdef class MockBuffer:
...@@ -152,7 +152,8 @@ def char3int(fmt): ...@@ -152,7 +152,8 @@ def char3int(fmt):
>>> char3int("c1i1i1i") >>> char3int("c1i1i1i")
>>> char3int("c3i") >>> char3int("c3i")
>>> char3int("ci2i") >>> char3int("ci2i")
>>> char3int("c@i@2i")
#TODO > char3int("c@i@2i")
Extra pad bytes (assuming int size is 4 or more) Extra pad bytes (assuming int size is 4 or more)
>>> char3int("cxiii") >>> char3int("cxiii")
...@@ -161,11 +162,12 @@ def char3int(fmt): ...@@ -161,11 +162,12 @@ def char3int(fmt):
Standard alignment (assming int size is 4) Standard alignment (assming int size is 4)
>>> char3int("=c3xiii") >>> char3int("=c3xiii")
>>> char3int("=cxxx@iii")
>>> char3int("=ciii") >>> char3int("=ciii")
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
#TODO char3int("=cxxx@iii")
Error: Error:
>>> char3int("cii") >>> char3int("cii")
...@@ -222,7 +224,6 @@ def complex_test(fmt): ...@@ -222,7 +224,6 @@ def complex_test(fmt):
def alignment_string(fmt, exc=None): def alignment_string(fmt, exc=None):
""" """
>>> alignment_string("@i") >>> alignment_string("@i")
>>> alignment_string("@i@@")
>>> alignment_string("%si" % current_endian) >>> alignment_string("%si" % current_endian)
>>> alignment_string("%si" % other_endian, "X-endian buffer not supported on X-endian compiler") >>> alignment_string("%si" % other_endian, "X-endian buffer not supported on X-endian compiler")
>>> alignment_string("=i") >>> alignment_string("=i")
...@@ -268,6 +269,39 @@ def mixed_complex_struct(): ...@@ -268,6 +269,39 @@ def mixed_complex_struct():
""" """
cdef object[MixedComplex] buf = MockBuffer("Zd", sizeof(MixedComplex)) cdef object[MixedComplex] buf = MockBuffer("Zd", sizeof(MixedComplex))
cdef packed struct PackedSubStruct:
char x
int y
cdef packed struct PackedStruct:
char a
int b
PackedSubStruct sub
@testcase
def packed_struct(fmt):
"""
Assuming int is four bytes:
>>> packed_struct("^cici")
>>> packed_struct("=cibi")
>>> packed_struct("^c@i^ci")
Traceback (most recent call last):
...
ValueError: Buffer packing mode currently only allowed at beginning of format string (this is a defect)
However aligned access won't work:
>>> packed_struct("@cici")
Traceback (most recent call last):
...
ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
"""
cdef object[PackedStruct] buf = MockBuffer(fmt, sizeof(PackedStruct))
# TODO: empty struct # TODO: empty struct
# TODO: Incomplete structs # TODO: Incomplete structs
# TODO: mixed structs
...@@ -2,6 +2,17 @@ ...@@ -2,6 +2,17 @@
cimport numpy as np cimport numpy as np
def little_endian():
cdef int endian_detector = 1
return (<char*>&endian_detector)[0] != 0
if little_endian():
my_endian = '<'
other_endian = '>'
else:
my_endian = '>'
other_endian = '<'
try: try:
import numpy as np import numpy as np
__doc__ = u""" __doc__ = u"""
...@@ -130,23 +141,49 @@ try: ...@@ -130,23 +141,49 @@ try:
>>> test_dtype(np.int32, inc1_int32_t) >>> test_dtype(np.int32, inc1_int32_t)
>>> test_dtype(np.float64, inc1_float64_t) >>> test_dtype(np.float64, inc1_float64_t)
Endian tests:
>>> test_dtype('%si' % my_endian, inc1_int)
>>> test_dtype('%si' % other_endian, inc1_int)
Traceback (most recent call last):
...
ValueError: Non-native byte order not supported
>>> test_recordarray() >>> test_recordarray()
>>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
('a', np.dtype('i,i')),\ ('a', np.dtype('i,i')),\
('b', np.dtype('i,i'))\ ('b', np.dtype('i,i'))\
]))) ]))))
array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))], array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))],
dtype=[('a', [('f0', '<i4'), ('f1', '<i4')]), ('b', [('f0', '<i4'), ('f1', '<i4')])]) dtype=[('a', [('f0', '!i4'), ('f1', '!i4')]), ('b', [('f0', '!i4'), ('f1', '!i4')])])
>>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
('a', np.dtype('i,f')),\ ('a', np.dtype('i,f')),\
('b', np.dtype('i,i'))\ ('b', np.dtype('i,i'))\
]))) ]))))
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: Buffer dtype mismatch, expected 'int' but got 'float' in 'DoubleInt.y' ValueError: Buffer dtype mismatch, expected 'int' but got 'float' in 'DoubleInt.y'
>>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
array([(22, 23)],
dtype=[('f0', '|i1'), ('f1', '!i4')])
>>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
array([(22, 23)],
dtype=[('f0', '|i1'), ('', '|V3'), ('f1', '!i4')])
>>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
Traceback (most recent call last):
...
ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
>>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
Traceback (most recent call last):
...
ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
>>> test_good_cast() >>> test_good_cast()
True True
...@@ -300,7 +337,7 @@ def test_nested_dtypes(obj): ...@@ -300,7 +337,7 @@ def test_nested_dtypes(obj):
arr[1].b.x = arr[0].a.y + 1 arr[1].b.x = arr[0].a.y + 1
arr[1].b.y = 4 arr[1].b.y = 4
arr[2] = arr[1] arr[2] = arr[1]
return arr return repr(arr).replace('<', '!').replace('>', '!')
def test_bad_nested_dtypes(): def test_bad_nested_dtypes():
cdef object[BadNestedStruct] arr cdef object[BadNestedStruct] arr
...@@ -314,3 +351,21 @@ def test_good_cast(): ...@@ -314,3 +351,21 @@ def test_good_cast():
def test_bad_cast(): def test_bad_cast():
# This should raise an exception # This should raise an exception
cdef np.ndarray[long, cast=True] arr = np.array([1], dtype=b'b') cdef np.ndarray[long, cast=True] arr = np.array([1], dtype=b'b')
cdef packed struct PackedStruct:
char a
int b
cdef struct UnpackedStruct:
char a
int b
def test_packed_align(np.ndarray[PackedStruct] arr):
arr[0].a = 22
arr[0].b = 23
return repr(arr).replace('<', '!').replace('>', '!')
def test_unpacked_align(np.ndarray[UnpackedStruct] arr):
arr[0].a = 22
arr[0].b = 23
return repr(arr).replace('<', '!').replace('>', '!')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment