More buffer typestring; fixes #285

15d052fc · Dag Sverre Seljebotn · ba35eb82 · 15d052fc · 15d052fc · 15d052fc
Commit 15d052fc authored May 06, 2009 by Dag Sverre Seljebotn
4 changed files
--- a/Cython/Compiler/Buffer.py
+++ b/Cython/Compiler/Buffer.py
@@ -1002,6 +1002,14 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
  return 0;    
 }

+static int __Pyx_BufFmt_FirstPack(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->enc_type != 0 || ctx->packmode != '@') {
+    PyErr_SetString(PyExc_ValueError, "Buffer packing mode currently only allowed at beginning of format string (this is a defect)");
+    return -1;
+  }
+  return 0;
+}
+
 static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
  int got_Z = 0;
  while (1) {
@@ -1027,6 +1035,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
          return NULL;
        }
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
        ctx->packmode = '=';
        ++ts;
        break;
@@ -1036,13 +1045,15 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
          return NULL;
        }
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
        ctx->packmode = '=';
        ++ts;
        break;
      case '=':
      case '@':
      case '^':
-      ctx->packmode = *ts++;
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
+        ctx->packmode = *ts++;
        break;
      case 'T': /* substruct */
        {

--- a/Cython/Includes/numpy.pxd
+++ b/Cython/Includes/numpy.pxd
@@ -16,7 +16,7 @@ cimport stdlib

 cdef extern from "Python.h":
    ctypedef int Py_intptr_t
-    
+
 cdef extern from "numpy/arrayobject.h":
    ctypedef Py_intptr_t npy_intp
        
@@ -68,6 +68,9 @@ cdef extern from "numpy/arrayobject.h":
            # In particular strided access is always provided regardless
            # of flags
            cdef int copy_shape, i, ndim
+            cdef int endian_detector = 1
+            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+            
            ndim = PyArray_NDIM(self)
            
            if sizeof(npy_intp) != sizeof(Py_ssize_t):
@@ -105,7 +108,6 @@ cdef extern from "numpy/arrayobject.h":
            cdef dtype descr = self.descr
            cdef list stack
            cdef int offset
-            cdef char byteorder = 0

            cdef bint hasfields = PyDataType_HASFIELDS(descr)

@@ -118,6 +120,9 @@ cdef extern from "numpy/arrayobject.h":

            if not hasfields:
                t = descr.type_num
+                if ((descr.byteorder == '>' and little_endian) or
+                    (descr.byteorder == '<' and not little_endian)):
+                    raise ValueError("Non-native byte order not supported")
                if   t == NPY_BYTE:        f = "b"
                elif t == NPY_UBYTE:       f = "B"
                elif t == NPY_SHORT:       f = "h"
@@ -141,10 +146,11 @@ cdef extern from "numpy/arrayobject.h":
                return
            else:
                info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+                info.format[0] = '^' # Native data types, manual alignment
                offset = 0
-                f = _util_dtypestring(descr, info.format,
+                f = _util_dtypestring(descr, info.format + 1,
                                      info.format + _buffer_format_string_len,
-                                      &offset, &byteorder)
+                                      &offset)
                f[0] = 0 # Terminate format string

        def __releasebuffer__(ndarray self, Py_buffer* info):
@@ -257,39 +263,45 @@ ctypedef npy_cdouble     cdouble_t
 ctypedef npy_clongdouble clongdouble_t


-cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset, char* byteorder) except NULL:
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
    # Recursive utility function used in __getbuffer__ to get format
    # string. The new location in the format string is returned.

    cdef dtype child
    cdef int delta_offset
    cdef tuple i
-    cdef char new_byteorder
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] == 0)
+    
    for i in descr.fields.itervalues():
        child = i[0]
        new_offset = i[1]

-        if (end - f) - (new_offset - offset[0]) < 15: # this should leave room for "T{" and "}" as well
+        if (end - f) - (new_offset - offset[0]) < 15:
            raise RuntimeError("Format string allocated too short, see comment in numpy.pxd")

-#        new_byteorder = child.byteorder
-#        if new_byteorder == '|': new_byteorder = '='
-#        if byteorder[0] != new_byteorder:
-#            f[0] = new_byteorder
-#            f += 1
-#            byteorder[0] = new_byteorder
-
+        if ((child.byteorder == '>' and little_endian) or
+            (child.byteorder == '<' and not little_endian)):
+            raise ValueError("Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+        
        # Output padding bytes
-#        while offset[0] < new_offset:
-#            f[0] = 120 # "x"; pad byte
-#            f += 1
-#            offset[0] += 1
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1

        offset[0] += child.itemsize
            
        if not PyDataType_HASFIELDS(child):
            t = child.type_num
-            if end - f < 15: # this should leave room for "T{" and "}" as well
+            if end - f < 5:
                raise RuntimeError("Format string allocated too short.")

            # Until ticket #99 is fixed, use integers to avoid warnings
@@ -314,11 +326,8 @@ cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset
                raise ValueError("unknown dtype code in numpy.pxd (%d)" % t)
            f += 1
        else:
-            f[0] = 84 #"T"
-            f[1] = 123 #"{"
-            f += 2
-            f = _util_dtypestring(child, f, end, offset, byteorder)
-            f[0] = 125 #"}"
-            f += 1
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
    return f
                
--- a/tests/run/buffmt.pyx
+++ b/tests/run/buffmt.pyx
@@ -26,10 +26,10 @@ cdef struct align_of_int_helper:
    int i
 double_align = sizeof(align_of_double_helper) - sizeof(double)
 int_align = sizeof(align_of_int_helper) - sizeof(int)
-if double_align != 8:
-    raise RuntimeError("Alignment of double is %d on this system, please report to cython-dev for a testcase fix" % double_align)
-if int_align != 4:
-    raise RuntimeError("Alignment of int is %d on this system, please report to cython-dev for a testcase fix" % int_align)
+if double_align != 8 or sizeof(double) != 8:
+    raise RuntimeError("Alignment or size of double is %d on this system, please report to cython-dev for a testcase fix" % double_align)
+if int_align != 4 or sizeof(int) != 4:
+    raise RuntimeError("Alignment or size of int is %d on this system, please report to cython-dev for a testcase fix" % int_align)

 
 cdef class MockBuffer:
@@ -152,7 +152,8 @@ def char3int(fmt):
    >>> char3int("c1i1i1i")    
    >>> char3int("c3i")
    >>> char3int("ci2i")
-    >>> char3int("c@i@2i")
+
+    #TODO > char3int("c@i@2i")

    Extra pad bytes (assuming int size is 4 or more)
    >>> char3int("cxiii")
@@ -161,11 +162,12 @@ def char3int(fmt):

    Standard alignment (assming int size is 4)
    >>> char3int("=c3xiii")
-    >>> char3int("=cxxx@iii")
    >>> char3int("=ciii")
    Traceback (most recent call last):
        ...
    ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
+
+    #TODO char3int("=cxxx@iii")
    
    Error:
    >>> char3int("cii")
@@ -222,7 +224,6 @@ def complex_test(fmt):
 def alignment_string(fmt, exc=None):
    """
    >>> alignment_string("@i")
-    >>> alignment_string("@i@@")
    >>> alignment_string("%si" % current_endian)
    >>> alignment_string("%si" % other_endian, "X-endian buffer not supported on X-endian compiler")
    >>> alignment_string("=i")
@@ -268,6 +269,39 @@ def mixed_complex_struct():
    """
    cdef object[MixedComplex] buf = MockBuffer("Zd", sizeof(MixedComplex))

- 
+
+cdef packed struct PackedSubStruct:
+    char x
+    int y
+
+cdef packed struct PackedStruct:
+    char a
+    int b
+    PackedSubStruct sub
+    
+
+@testcase
+def packed_struct(fmt):
+    """
+    Assuming int is four bytes:
+    
+    >>> packed_struct("^cici")
+    >>> packed_struct("=cibi")
+
+    >>> packed_struct("^c@i^ci")
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer packing mode currently only allowed at beginning of format string (this is a defect)
+    
+    However aligned access won't work:
+    >>> packed_struct("@cici")
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
+
+    """
+    cdef object[PackedStruct] buf = MockBuffer(fmt, sizeof(PackedStruct))
+
 # TODO: empty struct
 # TODO: Incomplete structs
+# TODO: mixed structs
--- a/tests/run/numpy_test.pyx
+++ b/tests/run/numpy_test.pyx
@@ -2,6 +2,17 @@

 cimport numpy as np

+def little_endian():
+    cdef int endian_detector = 1
+    return (<char*>&endian_detector)[0] != 0
+
+if little_endian():
+    my_endian = '<'
+    other_endian = '>'
+else:
+    my_endian = '>'
+    other_endian = '<'
+
 try:
    import numpy as np
    __doc__ = u"""
@@ -130,23 +141,49 @@ try:
    >>> test_dtype(np.int32, inc1_int32_t)
    >>> test_dtype(np.float64, inc1_float64_t)

+    Endian tests:
+    >>> test_dtype('%si' % my_endian, inc1_int)
+    >>> test_dtype('%si' % other_endian, inc1_int)
+    Traceback (most recent call last):
+       ...
+    ValueError: Non-native byte order not supported
+    
+
+
    >>> test_recordarray()
    
-    >>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
+    >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
            ('a', np.dtype('i,i')),\
            ('b', np.dtype('i,i'))\
-        ])))
+        ]))))
    array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))], 
-          dtype=[('a', [('f0', '<i4'), ('f1', '<i4')]), ('b', [('f0', '<i4'), ('f1', '<i4')])])
+          dtype=[('a', [('f0', '!i4'), ('f1', '!i4')]), ('b', [('f0', '!i4'), ('f1', '!i4')])])

-    >>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
+    >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
            ('a', np.dtype('i,f')),\
            ('b', np.dtype('i,i'))\
-        ])))
+        ]))))
    Traceback (most recent call last):
        ...
    ValueError: Buffer dtype mismatch, expected 'int' but got 'float' in 'DoubleInt.y'

+    >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
+    array([(22, 23)], 
+          dtype=[('f0', '|i1'), ('f1', '!i4')])
+    >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
+    array([(22, 23)], 
+          dtype=[('f0', '|i1'), ('', '|V3'), ('f1', '!i4')])
+
+    >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
+
+    >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
+

    >>> test_good_cast()
    True
@@ -300,7 +337,7 @@ def test_nested_dtypes(obj):
    arr[1].b.x = arr[0].a.y + 1
    arr[1].b.y = 4
    arr[2] = arr[1]
-    return arr
+    return repr(arr).replace('<', '!').replace('>', '!')

 def test_bad_nested_dtypes():
    cdef object[BadNestedStruct] arr
@@ -314,3 +351,21 @@ def test_good_cast():
 def test_bad_cast():
    # This should raise an exception
    cdef np.ndarray[long, cast=True] arr = np.array([1], dtype=b'b')
+
+cdef packed struct PackedStruct:
+    char a
+    int b
+
+cdef struct UnpackedStruct:
+    char a
+    int b
+
+def test_packed_align(np.ndarray[PackedStruct] arr):
+    arr[0].a = 22
+    arr[0].b = 23
+    return repr(arr).replace('<', '!').replace('>', '!')
+
+def test_unpacked_align(np.ndarray[UnpackedStruct] arr):
+    arr[0].a = 22
+    arr[0].b = 23    
+    return repr(arr).replace('<', '!').replace('>', '!')