Commit ca91ec9b authored by Mark Florisson's avatar Mark Florisson

slice assignment broadcasting & fix some bugs

parent 017b73ae
......@@ -1694,7 +1694,7 @@ class NameNode(AtomicExprNode):
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
elif entry.is_local or entry.in_closure or entry.from_closure:
elif entry.is_local or entry.in_closure or entry.from_closure or entry.type.is_memoryviewslice:
# Raise UnboundLocalError for objects and memoryviewslices
raise_unbound = (
(self.cf_maybe_null or self.cf_is_null) and not self.allow_null)
......
......@@ -431,57 +431,20 @@ def verify_direct_dimensions(node):
for access, packing in node.type.axes:
if access != 'direct':
error(self.pos, "All dimensions must be direct")
return False
return True
def broadcast(src, dst, src_temp, dst_temp, code):
"Perform an in-place broadcast of slices src and dst"
if src.type.ndim != dst.type.ndim:
code.putln("__pyx_memoryview_broadcast_inplace(&%s, &%s, %d, %d);" % (
src_temp, dst_temp, src.type.ndim, dst.type.ndim))
return max(src.type.ndim, dst.type.ndim)
return src.type.ndim
def copy_broadcast_memview_src_to_dst_inplace(src, dst, src_temp, dst_temp, code):
def copy_broadcast_memview_src_to_dst(src, dst, code):
"""
It is hard to check for overlapping memory with indirect slices,
so we currently don't support them.
Copy the contents of slice src to slice dst. Does not support indirect
slices.
"""
if not verify_direct_dimensions(src): return
if not verify_direct_dimensions(dst): return
ndim = broadcast(src, dst, src_temp, dst_temp, code)
call = "%s(&%s, &%s, %d)" % (copy_src_to_dst_cname(),
src_temp, dst_temp, ndim)
code.putln(code.error_goto_if_neg(call, dst.pos))
def copy_broadcast_memview_src_to_dst(src, dst, code):
# Note: do not use code.funcstate.allocate_temp to allocate temps, as
# temps will be acquisition counted (so we would need new
# references, as any sudden exception would cause a jump leading to
# a decref before we can nullify our slice)
src_tmp = None
dst_tmp = None
code.begin_block()
if src.type.ndim < dst.type.ndim and not src.result_in_temp():
src_tmp = '__pyx_slice_tmp1'
code.putln("%s %s = %s;" % (memviewslice_cname, src_tmp, src.result()))
if dst.type.ndim < src.type.ndim and not dst.result_in_temp():
dst_tmp = '__pyx+_slice_tmp2'
code.putln("%s %s = %s;" % (memviewslice_cname, dst_tmp, dst.result()))
copy_broadcast_memview_src_to_dst_inplace(src, dst,
src_tmp or src.result(),
dst_tmp or dst.result(),
code)
verify_direct_dimensions(src)
verify_direct_dimensions(dst)
code.end_block()
code.putln(code.error_goto_if_neg(
"%s(%s, %s, %d, %d)" % (copy_src_to_dst_cname(),
src.result(), dst.result(),
src.type.ndim, dst.type.ndim),
dst.pos))
def copy_c_or_fortran_cname(memview):
if memview.is_c_contig:
......@@ -791,7 +754,8 @@ def load_memview_c_utility(util_code_name, context=None, **kwargs):
context=context, **kwargs)
def use_cython_array_utility_code(env):
env.global_scope().context.cython_scope.lookup('array_cwrapper').used = True
scope = env.global_scope().context.cython_scope
scope.lookup('array_cwrapper').used = True
env.use_utility_code(cython_array_utility_code)
context = {
......
......@@ -19,6 +19,9 @@ cdef extern from "Python.h":
cdef extern from *:
object __pyx_memoryview_new(object obj, int flags)
Py_ssize_t fill_contig_strides_array "__pyx_fill_contig_strides_array" (
Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride,
int ndim, char order) nogil
@cname("__pyx_array")
cdef class array:
......@@ -73,23 +76,17 @@ cdef class array:
self._shape[idx] = dim
idx += 1
stride = itemsize
if mode == "fortran":
idx = 0
for dim in shape:
self._strides[idx] = stride
stride = stride * dim
idx += 1
elif mode == "c":
idx = self.ndim-1
for dim in shape[::-1]:
self._strides[idx] = stride
stride = stride * dim
idx -= 1
else:
if mode not in ("fortran", "c"):
raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode)
self.len = stride
cdef char order
if mode == 'fortran':
order = 'F'
else:
order = 'C'
self.len = fill_contig_strides_array(self._shape, self._strides,
itemsize, self.ndim, order)
decode = getattr(mode, 'decode', None)
if decode:
......@@ -179,6 +176,7 @@ cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, char *
return result
#################### View.MemoryView ####################
import cython
......@@ -251,6 +249,7 @@ cdef extern from *:
int ndim, size_t itemsize) nogil
cdef extern from "stdlib.h":
void *malloc(size_t) nogil
void free(void *) nogil
......@@ -371,14 +370,9 @@ cdef class memoryview(object):
cdef {{memviewslice_name}} src_slice
dst = self[index]
get_slice_from_memview(dst, &dst_slice)
slice_copy(src, &src_slice)
if dst.ndim != src.ndim:
broadcast_inplace(&src_slice, &dst_slice, src.ndim, dst.ndim)
memoryview_copy_contents(&src_slice, &dst_slice,
max(src.ndim, dst.ndim))
memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],
get_slice_from_memview(dst, &dst_slice)[0],
src.ndim, dst.ndim)
cdef setitem_slice_assign_scalar(self, index, value):
raise ValueError("Scalar assignment currently unsupported")
......@@ -990,99 +984,57 @@ cdef char get_best_order({{memviewslice_name}} *mslice, int ndim) nogil:
cdef Py_ssize_t c_stride = 0
cdef Py_ssize_t f_stride = 0
for i in range(ndim -1, -1, -1):
for i in range(ndim - 1, -1, -1):
if mslice.shape[i] > 1:
c_stride = mslice.strides[i]
break
for i in range(ndim):
if mslice.shape[i] > 1:
f_stride = mslice.strides[i]
break
if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):
return 'C'
else:
return 'F'
cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *strides1,
char *dst_data, Py_ssize_t *strides2,
Py_ssize_t *shape, int ndim, size_t itemsize) nogil:
cdef Py_ssize_t i, extent, stride1, stride2
extent = shape[0]
stride1 = strides1[0]
stride2 = strides2[0]
@cython.cdivision(True)
cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides,
char *dst_data, Py_ssize_t *dst_strides,
Py_ssize_t *src_shape, Py_ssize_t *dst_shape,
int ndim, size_t itemsize) nogil:
# Note: src_extent is 1 if we're broadcasting
# dst_extent always >= src_extent as we don't do reductions
cdef Py_ssize_t i
cdef Py_ssize_t src_extent = src_shape[0]
cdef Py_ssize_t dst_extent = dst_shape[0]
cdef Py_ssize_t src_stride = src_strides[0]
cdef Py_ssize_t dst_stride = dst_strides[0]
if ndim == 1:
if stride1 > 0 and stride2 > 0 and <size_t> stride1 == itemsize == <size_t> stride2:
memcpy(dst_data, src_data, itemsize * extent)
if (src_stride > 0 and dst_stride > 0 and
<size_t> src_stride == itemsize == <size_t> dst_stride):
memcpy(dst_data, src_data, itemsize * dst_extent)
else:
for i in range(extent):
for i in range(dst_extent):
memcpy(dst_data, src_data, itemsize)
src_data += stride1
dst_data += stride2
src_data += src_stride
dst_data += dst_stride
else:
for i in range(extent):
_copy_strided_to_strided(src_data, strides1 + 1,
dst_data, strides2 + 1,
shape + 1, ndim - 1, itemsize)
src_data += stride1
dst_data += stride2
for i in range(dst_extent):
_copy_strided_to_strided(src_data, src_strides + 1,
dst_data, dst_strides + 1,
src_shape + 1, dst_shape + 1,
ndim - 1, itemsize)
src_data += src_stride
dst_data += dst_stride
cdef void copy_strided_to_strided({{memviewslice_name}} *src,
{{memviewslice_name}} *dst,
int ndim, size_t itemsize) nogil:
_copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides,
src.shape, ndim, itemsize)
{{for strided_to_contig in (True, False)}}
{{if strided_to_contig}}
{{py: func_name = "copy_strided_to_contig"}}
{{else}}
{{py: func_name = "copy_contig_to_strided"}}
{{endif}}
@cname('__pyx_{{func_name}}')
cdef char *{{func_name}}(char *strided, char *contig, Py_ssize_t *shape,
Py_ssize_t *strides, int ndim, size_t itemsize) nogil:
"""
Copy contiguous data to strided memory, or strided memory to contiguous data.
The shape and strides are given only for the strided data.
"""
cdef Py_ssize_t i, extent, stride
cdef void *src, *dst
stride = strides[0]
extent = shape[0]
{{if strided_to_contig}}
src = strided
dst = contig
{{else}}
src = contig
dst = strided
{{endif}}
if ndim == 1:
# inner dimension, copy data
if stride > 0 and <size_t> stride == itemsize:
memcpy(dst, src, itemsize * extent)
contig += itemsize * extent
else:
for i in range(extent):
{{if strided_to_contig}}
memcpy(contig, strided, itemsize)
{{else}}
memcpy(strided, contig, itemsize)
{{endif}}
contig += itemsize
strided += stride
else:
for i in range(extent):
contig = {{func_name}}(strided, contig, shape + 1, strides + 1,
ndim - 1, itemsize)
strided += stride
return contig
{{endfor}}
src.shape, dst.shape, ndim, itemsize)
@cname('__pyx_memoryview_slice_get_size')
cdef Py_ssize_t slice_get_size({{memviewslice_name}} *src, int ndim) nogil:
......@@ -1095,8 +1047,29 @@ cdef Py_ssize_t slice_get_size({{memviewslice_name}} *src, int ndim) nogil:
return size
@cname('__pyx_fill_contig_strides_array')
cdef Py_ssize_t fill_contig_strides_array(
Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride,
int ndim, char order) nogil:
"Fill the strides array for a slice with C or F contiguous strides"
cdef int idx
if order == 'F':
for idx in range(ndim):
strides[idx] = stride
stride = stride * shape[idx]
else:
for idx in range(ndim - 1, -1, -1):
strides[idx] = stride
stride = stride * shape[idx]
return stride
@cname('__pyx_memoryview_copy_data_to_temp')
cdef void *copy_data_to_temp({{memviewslice_name}} *src, char order, int ndim) nogil except NULL:
cdef void *copy_data_to_temp({{memviewslice_name}} *src,
{{memviewslice_name}} *tmpslice,
char order,
int ndim) nogil except NULL:
"""
Copy a direct slice to temporary contiguous memory. The caller should free
the result when done.
......@@ -1112,100 +1085,103 @@ cdef void *copy_data_to_temp({{memviewslice_name}} *src, char order, int ndim) n
with gil:
raise MemoryError
# tmpslice[0] = src
tmpslice.data = <char *> result
tmpslice.memview = src.memview
for i in range(ndim):
tmpslice.shape[i] = src.shape[i]
tmpslice.suboffsets[i] = -1
fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize,
ndim, order)
# We need to broadcast strides again
for i in range(ndim):
if tmpslice.shape[i] == 1:
tmpslice.strides[i] = 0
if slice_is_contig(src, order, ndim):
memcpy(result, src.data, size)
else:
copy_strided_to_contig(src.data, <char *> result, src.shape, src.strides,
ndim, itemsize)
copy_strided_to_strided(src, tmpslice, ndim, itemsize)
return result
@cname('__pyx_memoryview_copy_contents')
cdef int memoryview_copy_contents({{memviewslice_name}} *src,
{{memviewslice_name}} *dst,
int ndim) nogil except -1:
cdef int memoryview_copy_contents({{memviewslice_name}} src,
{{memviewslice_name}} dst,
int src_ndim, int dst_ndim) nogil except -1:
"""
Copy memory from slice src to slice dst.
Check for overlapping memory and verify the shapes.
This function DOES NOT verify ndim and itemsize (either the compiler
or the caller should do that)
"""
cdef void *tmpdata
cdef void *tmpdata = NULL
cdef size_t itemsize = src.memview.view.itemsize
cdef int i, direct_copy
cdef char order = get_best_order(src, ndim)
cdef {{memviewslice_name}} src_copy, dst_copy
cdef int i
cdef char order = get_best_order(&src, src_ndim)
cdef bint broadcasting = False
cdef bint direct_copy = False
cdef {{memviewslice_name}} tmp
if src_ndim < dst_ndim:
broadcast_leading(&src, src_ndim, dst_ndim)
elif dst_ndim < src_ndim:
broadcast_leading(&dst, dst_ndim, src_ndim)
cdef int ndim = max(src_ndim, dst_ndim)
for i in range(ndim):
if src.shape[i] != dst.shape[i]:
if src.shape[i] == 1:
broadcasting = True
src.strides[i] = 0
else:
with gil:
raise ValueError(
"memoryview shapes are not the same in dimension %d "
"got differing extents in dimension %d "
"(got %d and %d)" % (i, dst.shape[i], src.shape[i]))
if slices_overlap(src, dst, ndim, itemsize):
if src.suboffsets[i] >= 0:
with gil:
raise ValueError("Dimension %d is not direct" % i)
if slices_overlap(&src, &dst, ndim, itemsize):
# slices overlap, copy to temp, copy temp to dst
if not slice_is_contig(src, order, ndim):
order = get_best_order(dst, ndim)
if not slice_is_contig(&src, order, ndim):
order = get_best_order(&dst, ndim)
tmpdata = copy_data_to_temp(src, order, ndim)
copy_contig_to_strided(dst.data, <char *> tmpdata, dst.shape,
dst.strides, ndim, itemsize)
free(tmpdata)
return 0
tmpdata = copy_data_to_temp(&src, &tmp, order, ndim)
src = tmp
# See if both slices have equal contiguity
if slice_is_contig(src, 'C', ndim):
direct_copy = slice_is_contig(dst, 'C', ndim)
elif slice_is_contig(src, 'F', ndim):
direct_copy = slice_is_contig(dst, 'F', ndim)
else:
direct_copy = False
if not broadcasting:
# See if both slices have equal contiguity, in that case perform a
# direct copy. This only works when we are not broadcasting.
if slice_is_contig(&src, 'C', ndim):
direct_copy = slice_is_contig(&dst, 'C', ndim)
elif slice_is_contig(&src, 'F', ndim):
direct_copy = slice_is_contig(&dst, 'F', ndim)
if direct_copy:
# Contiguous slices with same order
memcpy(dst.data, src.data, slice_get_size(src, ndim))
memcpy(dst.data, src.data, slice_get_size(&src, ndim))
return 0
# Slices are not overlapping and not contiguous
if order == 'F' == get_best_order(dst, ndim):
if order == 'F' == get_best_order(&dst, ndim):
# see if both slices have Fortran order, transpose them to match our
# C-style indexing order
if src != &src_copy:
src_copy = src[0]
src = &src_copy
transpose_memslice(&src)
transpose_memslice(&dst)
if dst != &dst_copy:
dst_copy = dst[0]
dst = &dst_copy
transpose_memslice(src)
transpose_memslice(dst)
copy_strided_to_strided(src, dst, ndim, itemsize)
copy_strided_to_strided(&src, &dst, ndim, itemsize)
free(tmpdata)
return 0
@cname('__pyx_memoryview_broadcast_inplace')
cdef void broadcast_inplace({{memviewslice_name}} *slice1,
{{memviewslice_name}} *slice2,
int ndim1,
int ndim2) nogil:
"""
Broadcast the slice with the least dimensions to prepend empty
dimensions.
"""
@cname('__pyx_memoryview_broadcast_leading')
cdef void broadcast_leading({{memviewslice_name}} *slice,
int ndim,
int ndim_other) nogil:
cdef int i
cdef int offset = ndim1 - ndim2
cdef int ndim
cdef {{memviewslice_name}} *slice
if offset < 0:
slice = slice1
offset = -offset
ndim = ndim1
else:
slice = slice2
ndim = ndim2
cdef int offset = ndim_other - ndim
for i in range(ndim - 1, -1, -1):
slice.shape[i + offset] = slice.shape[i]
......@@ -1217,6 +1193,7 @@ cdef void broadcast_inplace({{memviewslice_name}} *slice1,
slice.strides[i] = slice.strides[0]
slice.suboffsets[i] = -1
############### BufferFormatFromTypeInfo ###############
cdef extern from *:
ctypedef struct __Pyx_StructField
......
......@@ -18,12 +18,12 @@ typedef struct {
#define CYTHON_ATOMICS 1
#endif
#define __pyx_atomic_int_type int
/* todo: Portland pgcc, maybe OS X's OSAtomicIncrement32,
libatomic + autotools-like distutils support? Such a pain... */
#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 || \
(__GNUC_MINOR__ == 1 && __GNUC_PATHLEVEL >= 2))
/* gcc >= 4.1.2 */
typedef volatile int __pyx_atomic_int;
#define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1)
#define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1)
......@@ -33,7 +33,7 @@ typedef struct {
#elif CYTHON_ATOMICS && MSC_VER
/* msvc */
#include <Windows.h>
typedef volatile LONG __pyx_atomic_int;
#define __pyx_atomic_int_type LONG
#define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value)
#define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value)
......@@ -41,7 +41,6 @@ typedef struct {
#warning "Using MSVC atomics"
#endif
#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER))
typedef volatile int __pyx_atomic_int;
#define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value)
#define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value)
......@@ -49,20 +48,20 @@ typedef struct {
#warning "Using Intel atomics"
#endif
#else
typedef volatile int __pyx_atomic_int;
#define CYTHON_ATOMICS 0
#ifdef __PYX_DEBUG_ATOMICS
#warning "Not using atomics"
#endif
#endif
typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
#if CYTHON_ATOMICS
__pyx_atomic_int CYTHON_INLINE
static CYTHON_INLINE __pyx_atomic_int_type
__pyx_atomic_incr_maybealigned(__pyx_atomic_int *value, PyThread_type_lock lock);
__pyx_atomic_int CYTHON_INLINE
static CYTHON_INLINE __pyx_atomic_int_type
__pyx_atomic_decr_maybealigned(__pyx_atomic_int *value, PyThread_type_lock lock);
#define __pyx_add_acquisition_count(memview) \
......@@ -82,7 +81,7 @@ typedef struct {
#define __pyx_check_unaligned(type, pointer) \
(((type) pointer) & (sizeof(pointer) - 1))
int CYTHON_INLINE
static CYTHON_INLINE int
__pyx_atomic_unaligned(__pyx_atomic_int *p)
{
/* uintptr_t is optional in C99, try other stuff */
......@@ -99,7 +98,8 @@ __pyx_atomic_unaligned(__pyx_atomic_int *p)
return 1;
}
__pyx_atomic_int CYTHON_INLINE
static CYTHON_INLINE __pyx_atomic_int_type
__pyx_atomic_incr_maybealigned(__pyx_atomic_int *value, PyThread_type_lock lock)
{
if (unlikely(__pyx_atomic_unaligned(value)))
......@@ -108,7 +108,7 @@ __pyx_atomic_incr_maybealigned(__pyx_atomic_int *value, PyThread_type_lock lock)
return __pyx_atomic_incr_aligned(value, lock);
}
__pyx_atomic_int CYTHON_INLINE
static CYTHON_INLINE __pyx_atomic_int_type
__pyx_atomic_decr_maybealigned(__pyx_atomic_int *value, PyThread_type_lock lock)
{
if (unlikely(__pyx_atomic_unaligned(value)))
......@@ -522,8 +522,7 @@ __pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
if (unlikely(__Pyx_init_memviewslice(memview_obj, ndim, &new_mvs) < 0))
goto fail;
if (unlikely(__pyx_memoryview_copy_contents(
({{memviewslice_name}} *) from_mvs, &new_mvs, ndim) < 0))
if (unlikely(__pyx_memoryview_copy_contents(*from_mvs, new_mvs, ndim, ndim) < 0))
goto fail;
goto no_fail;
......
......@@ -171,10 +171,10 @@ def test_copy_mismatch():
>>> test_copy_mismatch()
Traceback (most recent call last):
...
ValueError: memoryview shapes are not the same in dimension 0 (got 2 and 1)
ValueError: got differing extents in dimension 0 (got 2 and 3)
'''
cdef int[:,:,::1] mv1 = array((2,2,3), sizeof(int), 'i')
cdef int[:,:,::1] mv2 = array((1,2,3), sizeof(int), 'i')
cdef int[:,:,::1] mv2 = array((3,2,3), sizeof(int), 'i')
mv1[...] = mv2
......
......@@ -1747,7 +1747,7 @@ def test_slice_assignment():
for i in range(10):
for j in range(100):
carray[i][j] = i * 10 + j
carray[i][j] = i * 100 + j
cdef int[:, :] m = carray
cdef int[:, :] copy = m[-6:-1, 60:65].copy()
......@@ -1758,12 +1758,12 @@ def test_slice_assignment():
for i in range(5):
for j in range(5):
assert copy[i, j] == m[-5 + i, -5 + j]
assert copy[i, j] == m[-5 + i, -5 + j], (copy[i, j], m[-5 + i, -5 + j])
@testcase
def test_slice_assignment_broadcast_leading_dimensions():
def test_slice_assignment_broadcast_leading():
"""
>>> test_slice_assignment_broadcast_leading_dimensions()
>>> test_slice_assignment_broadcast_leading()
"""
cdef int array1[1][10]
cdef int array2[10]
......@@ -1780,4 +1780,36 @@ def test_slice_assignment_broadcast_leading_dimensions():
a[:, :] = b[:]
for i in range(10):
assert a[0, i] == b[i] == 10 - 1 - i
assert a[0, i] == b[i] == 10 - 1 - i, (b[i], a[0, i])
@testcase
def test_slice_assignment_broadcast_strides():
"""
>>> test_slice_assignment_broadcast_strides()
"""
cdef int src_array[10]
cdef int dst_array[10][5]
cdef int i, j
for i in range(10):
src_array[i] = 10 - 1 - i
cdef int[:] src = src_array
cdef int[:, :] dst = dst_array
cdef int[:, :] dst_f = dst.copy_fortran()
dst[1:] = src[-1:-6:-1]
dst_f[1:] = src[-1:-6:-1]
for i in range(1, 10):
for j in range(1, 5):
assert dst[i, j] == dst_f[i, j] == j, (dst[i, j], dst_f[i, j], j)
# test overlapping memory with broadcasting
dst[:, 1:4] = dst[1, :3]
dst_f[:, 1:4] = dst[1, 1:4]
for i in range(10):
for j in range(1, 3):
assert dst[i, j] == dst_f[i, j] == j - 1, (dst[i, j], dst_f[i, j], j - 1)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment