Commit f5134ba8 authored by Mark Florisson's avatar Mark Florisson

Significantly better performing scalar to slice assignment

parent 7302010f
...@@ -3077,10 +3077,28 @@ class IndexNode(ExprNode): ...@@ -3077,10 +3077,28 @@ class IndexNode(ExprNode):
"memslice[:]" "memslice[:]"
buffer_entry = self.buffer_entry() buffer_entry = self.buffer_entry()
have_gil = not self.in_nogil_context have_gil = not self.in_nogil_context
buffer_entry.generate_buffer_slice_code(code,
self.original_indices, have_slices = False
it = iter(self.indices)
for index in self.original_indices:
is_slice = isinstance(index, SliceNode)
have_slices = have_slices or is_slice
if is_slice:
if not index.start.is_none:
index.start = it.next()
if not index.stop.is_none:
index.stop = it.next()
if not index.step.is_none:
index.step = it.next()
else:
it.next()
assert not list(it)
buffer_entry.generate_buffer_slice_code(code, self.original_indices,
self.result(), self.result(),
have_gil=have_gil) have_gil=have_gil,
have_slices=have_slices)
def generate_memoryviewslice_setslice_code(self, rhs, code): def generate_memoryviewslice_setslice_code(self, rhs, code):
"memslice1[:] = memslice2" "memslice1[:] = memslice2"
......
...@@ -274,7 +274,8 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry): ...@@ -274,7 +274,8 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
return bufp return bufp
def generate_buffer_slice_code(self, code, indices, dst, have_gil): def generate_buffer_slice_code(self, code, indices, dst, have_gil,
have_slices):
""" """
Slice a memoryviewslice. Slice a memoryviewslice.
...@@ -298,12 +299,6 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry): ...@@ -298,12 +299,6 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
all_dimensions_direct = False all_dimensions_direct = False
break break
have_slices = False
for index in indices:
if isinstance(index, ExprNodes.SliceNode):
have_slices = True
break
no_suboffset_dim = all_dimensions_direct and not have_slices no_suboffset_dim = all_dimensions_direct and not have_slices
if not no_suboffset_dim: if not no_suboffset_dim:
suboffset_dim = code.funcstate.allocate_temp( suboffset_dim = code.funcstate.allocate_temp(
...@@ -447,16 +442,115 @@ def copy_broadcast_memview_src_to_dst(src, dst, code): ...@@ -447,16 +442,115 @@ def copy_broadcast_memview_src_to_dst(src, dst, code):
dst.type.dtype.is_pyobject), dst.type.dtype.is_pyobject),
dst.pos)) dst.pos))
def get_1d_fill_scalar_func(type, code):
dtype = type.dtype
type_decl = dtype.declaration_code("")
dtype_name = mangle_dtype_name(dtype)
context = dict(dtype_name=dtype_name, type_decl=type_decl)
utility = load_memview_c_utility("FillStrided1DScalar", context)
code.globalstate.use_utility_code(utility)
return '__pyx_fill_slice_%s' % dtype_name
def assign_scalar(dst, scalar, code): def assign_scalar(dst, scalar, code):
"Assign a scalar to a slice. Both nodes must be temps." """
Assign a scalar to a slice. dst must be a temp, scalar will be assigned
to a correct type and not just something assignable.
"""
verify_direct_dimensions(dst) verify_direct_dimensions(dst)
dtype = scalar.type dtype = dst.type.dtype
assert scalar.type.same_as(dst.type.dtype) type_decl = dtype.declaration_code("")
slice_decl = dst.type.declaration_code("")
code.begin_block()
code.putln("%s __pyx_temp_scalar = %s;" % (type_decl, scalar.result()))
if dst.result_in_temp() or (dst.base.is_name and
isinstance(dst.index, ExprNodes.EllipsisNode)):
dst_temp = dst.result()
else:
code.putln("%s __pyx_temp_slice = %s;" % (slice_decl, dst.result()))
dst_temp = "__pyx_temp_slice"
with slice_iter(dst.type, dst_temp, dst.type.ndim, code) as p:
if dtype.is_pyobject:
code.putln("Py_DECREF((PyObject *) %s);" % p)
code.putln("*((%s *) %s) = __pyx_temp_scalar;" % (type_decl, p))
if dtype.is_pyobject:
code.putln("Py_INCREF(__pyx_temp_scalar);")
code.end_block()
def slice_iter(slice_type, slice_temp, ndim, code):
if slice_type.is_c_contig or slice_type.is_f_contig:
return ContigSliceIter(slice_type, slice_temp, ndim, code)
else:
return StridedSliceIter(slice_type, slice_temp, ndim, code)
class SliceIter(object):
def __init__(self, slice_type, slice_temp, ndim, code):
self.slice_type = slice_type
self.slice_temp = slice_temp
self.code = code
self.ndim = ndim
class ContigSliceIter(SliceIter):
def __enter__(self):
code = self.code
code.begin_block()
type_decl = self.slice_type.dtype.declaration_code("")
total_size = ' * '.join("%s.shape[%d]" % (self.slice_temp, i)
for i in range(self.ndim))
code.putln("Py_ssize_t __pyx_temp_extent = %s;" % total_size)
code.putln("Py_ssize_t __pyx_temp_idx;")
code.putln("%s *__pyx_temp_pointer = %s.data;" % (type_decl,
self.slice_temp))
code.putln("for (__pyx_temp_idx = 0; "
"__pyx_temp_idx < __pyx_temp_extent; "
"__pyx_temp_idx++) {")
return "__pyx_temp_pointer"
def __exit__(self, *args):
self.code.putln("__pyx_temp_pointer += 1;")
self.code.putln("}")
self.code.end_block()
class StridedSliceIter(SliceIter):
def __enter__(self):
code = self.code
code.begin_block()
for i in range(self.ndim):
t = i, self.slice_temp, i
code.putln("Py_ssize_t __pyx_temp_extent_%d = %s.shape[%d];" % t)
code.putln("Py_ssize_t __pyx_temp_stride_%d = %s.strides[%d];" % t)
code.putln("char *__pyx_temp_pointer_%d;" % i)
code.putln("Py_ssize_t __pyx_temp_idx_%d;" % i)
code.putln("__pyx_temp_pointer_0 = %s.data;" % self.slice_temp)
for i in range(self.ndim):
if i > 0:
code.putln("__pyx_temp_pointer_%d = __pyx_temp_pointer_%d;" % (i, i - 1))
code.putln("for (__pyx_temp_idx_%d = 0; "
"__pyx_temp_idx_%d < __pyx_temp_extent_%d; "
"__pyx_temp_idx_%d++) {" % (i, i, i, i))
return "__pyx_temp_pointer_%d" % (self.ndim - 1)
def __exit__(self, *args):
code = self.code
for i in range(self.ndim - 1, -1, -1):
code.putln("__pyx_temp_pointer_%d += __pyx_temp_stride_%d;" % (i, i))
code.putln("}")
code.end_block()
t = (dst.result(), dst.type.ndim,
dtype.declaration_code(""), scalar.result(), dtype.is_pyobject)
code.putln("__pyx_memoryview_slice_assign_scalar("
"&%s, %d, sizeof(%s), &%s, %d);" % t)
def copy_c_or_fortran_cname(memview): def copy_c_or_fortran_cname(memview):
if memview.is_c_contig: if memview.is_c_contig:
......
...@@ -4770,10 +4770,7 @@ class SingleAssignmentNode(AssignmentNode): ...@@ -4770,10 +4770,7 @@ class SingleAssignmentNode(AssignmentNode):
self.rhs.type.is_pyobject)): self.rhs.type.is_pyobject)):
# scalar slice assignment # scalar slice assignment
self.lhs.is_memslice_scalar_assignment = True self.lhs.is_memslice_scalar_assignment = True
#self.lhs = self.lhs.coerce_to_temp(env)
self.lhs.is_temp = True
dtype = self.lhs.type.dtype dtype = self.lhs.type.dtype
use_temp = True
else: else:
dtype = self.lhs.type dtype = self.lhs.type
......
...@@ -1299,29 +1299,33 @@ cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, ...@@ -1299,29 +1299,33 @@ cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape,
# #
### Scalar to slice assignment ### Scalar to slice assignment
# #
@cname('__pyx_memoryview_slice_assign_scalar') @cname('__pyx_memoryview_slice_assign_scalar')
cdef void slice_assign_scalar({{memviewslice_name}} *dst, int ndim, cdef void slice_assign_scalar({{memviewslice_name}} *dst, int ndim,
size_t itemsize, void *item, size_t itemsize, void *item,
bint dtype_is_object) nogil: bint dtype_is_object) nogil:
refcount_copying(dst, dtype_is_object, ndim, False) refcount_copying(dst, dtype_is_object, ndim, False)
_slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, itemsize, item) _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim,
itemsize, item)
refcount_copying(dst, dtype_is_object, ndim, True) refcount_copying(dst, dtype_is_object, ndim, True)
@cname('__pyx_memoryview__slice_assign_scalar') @cname('__pyx_memoryview__slice_assign_scalar')
cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape,
Py_ssize_t *strides, int ndim, Py_ssize_t *strides, int ndim,
size_t itemsize, void *item) nogil: size_t itemsize, void *item) nogil:
cdef Py_ssize_t i cdef Py_ssize_t i
cdef Py_ssize_t stride = strides[0]
cdef Py_ssize_t extent = shape[0]
for i in range(shape[0]): if ndim == 1:
if ndim == 1: for i in range(extent):
memcpy(data, item, itemsize) memcpy(data, item, itemsize)
else: data += stride
else:
for i in range(extent):
_slice_assign_scalar(data, shape + 1, strides + 1, _slice_assign_scalar(data, shape + 1, strides + 1,
ndim - 1, itemsize, item) ndim - 1, itemsize, item)
data += stride
data += strides[0]
############### BufferFormatFromTypeInfo ############### ############### BufferFormatFromTypeInfo ###############
......
...@@ -259,7 +259,10 @@ static int __Pyx_ValidateAndInit_memviewslice( ...@@ -259,7 +259,10 @@ static int __Pyx_ValidateAndInit_memviewslice(
} }
if (spec & (__Pyx_MEMVIEW_STRIDED | __Pyx_MEMVIEW_FOLLOW)) { if (spec & (__Pyx_MEMVIEW_STRIDED | __Pyx_MEMVIEW_FOLLOW)) {
if (buf->strides[i] < buf->itemsize) { Py_ssize_t stride = buf->strides[i];
if (stride < 0)
stride = -stride;
if (stride < buf->itemsize) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Buffer and memoryview are not contiguous in the same dimension."); "Buffer and memoryview are not contiguous in the same dimension.");
goto fail; goto fail;
...@@ -801,3 +804,27 @@ if (unlikely(__pyx_memoryview_slice_memviewslice( ...@@ -801,3 +804,27 @@ if (unlikely(__pyx_memoryview_slice_memviewslice(
{{endif}} {{endif}}
} }
////////// FillStrided1DScalar.proto //////////
static void
__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride,
size_t itemsize, void *itemp);
////////// FillStrided1DScalar //////////
/* Fill a slice with a scalar value. The dimension is direct and strided or contiguous */
static void
__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride,
size_t itemsize, void *itemp)
{
Py_ssize_t i;
{{type_decl}} item = *(({{type_decl}} *) itemp);
{{type_decl}} *endp;
stride /= sizeof({{type_decl}});
endp = p + stride * extent;
while (p < endp) {
*p = item;
p += stride;
}
}
...@@ -1969,17 +1969,68 @@ def test_scalar_slice_assignment(): ...@@ -1969,17 +1969,68 @@ def test_scalar_slice_assignment():
cdef int[10] a cdef int[10] a
cdef int[:] m = a cdef int[:] m = a
_test_scalar_slice_assignment(m) cdef int a2[5][10]
cdef int[:, ::1] m2 = a2
_test_scalar_slice_assignment(m, m2)
print print
_test_scalar_slice_assignment(<object> m) _test_scalar_slice_assignment(<object> m, <object> m2)
cdef _test_scalar_slice_assignment(slice_1d m): cdef _test_scalar_slice_assignment(slice_1d m, slice_2d m2):
cdef int i cdef int i, j
for i in range(10): for i in range(10):
m[i] = i m[i] = i
m[-2:0:-2] = 6 m[-2:0:-2] = 6
for i in range(10): for i in range(10):
print m[i] print m[i]
for i in range(m2.shape[0]):
for j in range(m2.shape[1]):
m2[i, j] = i * m2.shape[1] + j
cdef int x = 2, y = -2
cdef long value = 1
m2[::2, ::-1] = value
m2[-2::-2, ::-1] = 2
m2[::2, -2::-2] = 0
m2[-2::-2, -2::-2] = 0
cdef int[:, :] s = m2[..., 1::2]
for i in range(s.shape[0]):
for j in range(s.shape[1]):
assert s[i, j] == i % 2 + 1, (s[i, j], i)
s = m2[::2, 1::2]
for i in range(s.shape[0]):
for j in range(s.shape[1]):
assert s[i, j] == 1, s[i, j]
s = m2[1::2, ::2]
for i in range(s.shape[0]):
for j in range(s.shape[1]):
assert s[i, j] == 0, s[i, j]
m2[...] = 3
for i in range(m2.shape[0]):
for j in range(m2.shape[1]):
assert m2[i, j] == 3, s[i, j]
@testcase
def test_contig_scalar_to_slice_assignment():
"""
>>> test_contig_scalar_to_slice_assignment()
14 14 14 14
20 20 20 20
"""
cdef int a[5][10]
cdef int[:, ::1] m = a
m[...] = 14
print m[0, 0], m[-1, -1], m[3, 2], m[4, 9]
m[:, :] = 20
print m[0, 0], m[-1, -1], m[3, 2], m[4, 9]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment