Merge branch 'master' of git+ssh://github.com/cython/cython

20ff6c2a · Stefan Behnel · 32290bf8 · 8ccdda6d · 20ff6c2a · 20ff6c2a
Commit 20ff6c2a authored Nov 24, 2017 by Stefan Behnel
16 changed files
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -36,6 +36,9 @@ Features added
 * Some PEP-484/526 container type declarations are now considered for
  loop optimisations.

+* Indexing into memoryview slices with ``view[i][j]`` is now optimised into
+  ``view[i, j]``.
+
 * Python compatible ``cython.*`` types can now be mixed with type declarations
  in Cython syntax.


--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -80,6 +80,82 @@ modifier_output_mapper = {
 is_self_assignment = re.compile(r" *(\w+) = (\1);\s*$").match


+class IncludeCode(object):
+    """
+    An include file and/or verbatim C code to be included in the
+    generated sources.
+    """
+    # attributes:
+    #
+    #  pieces    {order: unicode}: pieces of C code to be generated.
+    #            For the included file, the key "order" is zero.
+    #            For verbatim include code, the "order" is the "order"
+    #            attribute of the original IncludeCode where this piece
+    #            of C code was first added. This is needed to prevent
+    #            duplication if the same include code is found through
+    #            multiple cimports.
+    #  location  int: where to put this include in the C sources, one
+    #            of the constants INITIAL, EARLY, LATE
+    #  order     int: sorting order (automatically set by increasing counter)
+
+    # Constants for location. If the same include occurs with different
+    # locations, the earliest one takes precedense.
+    INITIAL = 0
+    EARLY = 1
+    LATE = 2
+
+    counter = 1   # Counter for "order"
+
+    def __init__(self, include=None, verbatim=None, late=True, initial=False):
+        self.order = self.counter
+        type(self).counter += 1
+        self.pieces = {}
+
+        if include:
+            if include[0] == '<' and include[-1] == '>':
+                self.pieces[0] = u'#include {0}'.format(include)
+                late = False  # system include is never late
+            else:
+                self.pieces[0] = u'#include "{0}"'.format(include)
+
+        if verbatim:
+            self.pieces[self.order] = verbatim
+
+        if initial:
+            self.location = self.INITIAL
+        elif late:
+            self.location = self.LATE
+        else:
+            self.location = self.EARLY
+
+    def dict_update(self, d, key):
+        """
+        Insert `self` in dict `d` with key `key`. If that key already
+        exists, update the attributes of the existing value with `self`.
+        """
+        if key in d:
+            other = d[key]
+            other.location = min(self.location, other.location)
+            other.pieces.update(self.pieces)
+        else:
+            d[key] = self
+
+    def sortkey(self):
+        return self.order
+
+    def mainpiece(self):
+        """
+        Return the main piece of C code, corresponding to the include
+        file. If there was no include file, return None.
+        """
+        return self.pieces.get(0)
+
+    def write(self, code):
+        # Write values of self.pieces dict, sorted by the keys
+        for k in sorted(self.pieces):
+            code.putln(self.pieces[k])
+
+
 def get_utility_dir():
    # make this a function and not global variables:
    # http://trac.cython.org/cython_trac/ticket/475

--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -3690,23 +3690,33 @@ class IndexNode(_IndexingBaseNode):
        else:
            indices = [self.index]

-        base_type = self.base.type
+        base = self.base
+        base_type = base.type
        replacement_node = None
        if base_type.is_memoryviewslice:
            # memoryviewslice indexing or slicing
            from . import MemoryView
+            if base.is_memview_slice:
+                # For memory views, "view[i][j]" is the same as "view[i, j]" => use the latter for speed.
+                merged_indices = base.merged_indices(indices)
+                if merged_indices is not None:
+                    base = base.base
+                    base_type = base.type
+                    indices = merged_indices
            have_slices, indices, newaxes = MemoryView.unellipsify(indices, base_type.ndim)
            if have_slices:
-                replacement_node = MemoryViewSliceNode(self.pos, indices=indices, base=self.base)
+                replacement_node = MemoryViewSliceNode(self.pos, indices=indices, base=base)
            else:
-                replacement_node = MemoryViewIndexNode(self.pos, indices=indices, base=self.base)
+                replacement_node = MemoryViewIndexNode(self.pos, indices=indices, base=base)
        elif base_type.is_buffer or base_type.is_pythran_expr:
            if base_type.is_pythran_expr or len(indices) == base_type.ndim:
                # Buffer indexing
                is_buffer_access = True
                indices = [index.analyse_types(env) for index in indices]
                if base_type.is_pythran_expr:
-                    do_replacement = all(index.type.is_int or index.is_slice or index.type.is_pythran_expr for index in indices)
+                    do_replacement = all(
+                        index.type.is_int or index.is_slice or index.type.is_pythran_expr
+                        for index in indices)
                    if do_replacement:
                        for i,index in enumerate(indices):
                            if index.is_slice:
@@ -3716,7 +3726,7 @@ class IndexNode(_IndexingBaseNode):
                else:
                    do_replacement = all(index.type.is_int for index in indices)
                if do_replacement:
-                    replacement_node = BufferIndexNode(self.pos, indices=indices, base=self.base)
+                    replacement_node = BufferIndexNode(self.pos, indices=indices, base=base)
                    # On cloning, indices is cloned. Otherwise, unpack index into indices.
                    assert not isinstance(self.index, CloneNode)

@@ -4425,6 +4435,37 @@ class MemoryViewSliceNode(MemoryViewIndexNode):
        else:
            return MemoryCopySlice(self.pos, self)

+    def merged_indices(self, indices):
+        """Return a new list of indices/slices with 'indices' merged into the current ones
+        according to slicing rules.
+        Is used to implement "view[i][j]" => "view[i, j]".
+        Return None if the indices cannot (easily) be merged at compile time.
+        """
+        if not indices:
+            return None
+        # NOTE: Need to evaluate "self.original_indices" here as they might differ from "self.indices".
+        new_indices = self.original_indices[:]
+        indices = indices[:]
+        for i, s in enumerate(self.original_indices):
+            if s.is_slice:
+                if s.start.is_none and s.stop.is_none and s.step.is_none:
+                    # Full slice found, replace by index.
+                    new_indices[i] = indices[0]
+                    indices.pop(0)
+                    if not indices:
+                        return new_indices
+                else:
+                    # Found something non-trivial, e.g. a partial slice.
+                    return None
+            elif not s.type.is_int:
+                # Not a slice, not an integer index => could be anything...
+                return None
+        if indices:
+            if len(new_indices) + len(indices) > self.base.type.ndim:
+                return None
+            new_indices += indices
+        return new_indices
+
    def is_simple(self):
        if self.is_ellipsis_noop:
            # TODO: fix SimpleCallNode.is_simple()

--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -28,7 +28,7 @@ from . import Pythran
 from .Errors import error, warning
 from .PyrexTypes import py_object_type
 from ..Utils import open_new_file, replace_suffix, decode_filename
-from .Code import UtilityCode
+from .Code import UtilityCode, IncludeCode
 from .StringEncoding import EncodedString
 from .Pythran import has_np_pythran

@@ -86,16 +86,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):

        self.scope.utility_code_list.extend(scope.utility_code_list)

+        for inc in scope.c_includes.values():
+            self.scope.process_include(inc)
+
        def extend_if_not_in(L1, L2):
            for x in L2:
                if x not in L1:
                    L1.append(x)

-        extend_if_not_in(self.scope.include_files_early, scope.include_files_early)
-        extend_if_not_in(self.scope.include_files_late, scope.include_files_late)
        extend_if_not_in(self.scope.included_files, scope.included_files)
-        extend_if_not_in(self.scope.python_include_files,
-                         scope.python_include_files)

        if merge_scope:
            # Ensure that we don't generate import code for these entries!
@@ -621,8 +620,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
            code.putln("")
        code.putln("#define PY_SSIZE_T_CLEAN")

-        for filename in env.python_include_files:
-            code.putln('#include "%s"' % filename)
+        for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
+            if inc.location == inc.INITIAL:
+                inc.write(code)
        code.putln("#ifndef Py_PYTHON_H")
        code.putln("    #error Python headers needed to compile C extensions, "
                   "please install development version of Python.")
@@ -739,19 +739,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):

    def generate_includes(self, env, cimported_modules, code, early=True, late=True):
        includes = []
+        for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
+            if inc.location == inc.EARLY:
                if early:
-            includes += env.include_files_early
+                    inc.write(code)
+            elif inc.location == inc.LATE:
                if late:
-            includes += [include for include in env.include_files_late
-                         if include not in env.include_files_early]
-        for filename in includes:
-            byte_decoded_filenname = str(filename)
-
-            if byte_decoded_filenname[0] == '<' and byte_decoded_filenname[-1] == '>':
-                code.putln('#include %s' % byte_decoded_filenname)
-            else:
-                code.putln('#include "%s"' % byte_decoded_filenname)
-
+                    inc.write(code)
        if early:
            code.putln_openmp("#include <omp.h>")


--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -471,6 +471,7 @@ class StatNode(Node):

 class CDefExternNode(StatNode):
    #  include_file       string or None
+    #  verbatim_include   string or None
    #  body               StatListNode

    child_attrs = ["body"]
@@ -480,18 +481,16 @@ class CDefExternNode(StatNode):
        env.in_cinclude = 1
        self.body.analyse_declarations(env)
        env.in_cinclude = old_cinclude_flag
-        inc = self.include_file
-        if inc:
+
+        if self.include_file or self.verbatim_include:
+            # Determine whether include should be late
            stats = self.body.stats
-            if inc[0] == '<' and inc[-1] == '>':
-                # System include => always early
-                env.add_include_file(inc)
-            elif stats and all(isinstance(node, CVarDefNode) for node in stats):
-                # Generate a late include if the body is not empty and
-                # all statements are variable or function declarations.
-                env.add_include_file(inc, late=True)
-            else:
-                env.add_include_file(inc)
+            if not stats:
+                # Special case: empty 'cdef extern' blocks are early
+                late = False
+            else:
+                late = all(isinstance(node, CVarDefNode) for node in stats)
+            env.add_include_file(self.include_file, self.verbatim_include, late)

    def analyse_expressions(self, env):
        return self

--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -3081,9 +3081,13 @@ def p_cdef_extern_block(s, pos, ctx):
        ctx.namespace = p_string_literal(s, 'u')[2]
    if p_nogil(s):
        ctx.nogil = 1
-    body = p_suite(s, ctx)
+
+    # Use "docstring" as verbatim string to include
+    verbatim_include, body = p_suite_with_docstring(s, ctx, True)
+
    return Nodes.CDefExternNode(pos,
        include_file = include_file,
+        verbatim_include = verbatim_include,
        body = body,
        namespace = ctx.namespace)


--- a/Cython/Compiler/Symtab.py
+++ b/Cython/Compiler/Symtab.py
@@ -1068,9 +1068,8 @@ class ModuleScope(Scope):
    # doc                  string             Module doc string
    # doc_cname            string             C name of module doc string
    # utility_code_list    [UtilityCode]      Queuing utility codes for forwarding to Code.py
-    # python_include_files [string]           Standard  Python headers to be included
-    # include_files_early  [string]           C headers to be included before Cython decls
-    # include_files_late   [string]           C headers to be included after Cython decls
+    # c_includes           {key: IncludeCode} C headers or verbatim code to be generated
+    #                                         See process_include() for more documentation
    # string_to_entry      {string : Entry}   Map string const to entry
    # identifier_to_entry  {string : Entry}   Map identifier string const to entry
    # context              Context
@@ -1113,9 +1112,7 @@ class ModuleScope(Scope):
        self.doc_cname = Naming.moddoc_cname
        self.utility_code_list = []
        self.module_entries = {}
-        self.python_include_files = ["Python.h"]
-        self.include_files_early = []
-        self.include_files_late = []
+        self.c_includes = {}
        self.type_names = dict(outer_scope.type_names)
        self.pxd_file_loaded = 0
        self.cimported_modules = []
@@ -1129,6 +1126,7 @@ class ModuleScope(Scope):
        for var_name in ['__builtins__', '__name__', '__file__', '__doc__', '__path__',
                         '__spec__', '__loader__', '__package__', '__cached__']:
            self.declare_var(EncodedString(var_name), py_object_type, None)
+        self.process_include(Code.IncludeCode("Python.h", initial=True))

    def qualifying_scope(self):
        return self.parent_module
@@ -1251,24 +1249,50 @@ class ModuleScope(Scope):
            module = module.lookup_submodule(submodule)
        return module

-    def add_include_file(self, filename, late=False):
-        if filename in self.python_include_files:
-            return
-        # Possibly, the same include appears both as early and as late
-        # include. We'll deal with this at code generation time.
-        if late:
-            incs = self.include_files_late
-        else:
-            incs = self.include_files_early
-        if filename not in incs:
-            incs.append(filename)
+    def add_include_file(self, filename, verbatim_include=None, late=False):
+        """
+        Add `filename` as include file. Add `verbatim_include` as
+        verbatim text in the C file.
+        Both `filename` and `verbatim_include` can be `None` or empty.
+        """
+        inc = Code.IncludeCode(filename, verbatim_include, late=late)
+        self.process_include(inc)
+
+    def process_include(self, inc):
+        """
+        Add `inc`, which is an instance of `IncludeCode`, to this
+        `ModuleScope`. This either adds a new element to the
+        `c_includes` dict or it updates an existing entry.
+
+        In detail: the values of the dict `self.c_includes` are
+        instances of `IncludeCode` containing the code to be put in the
+        generated C file. The keys of the dict are needed to ensure
+        uniqueness in two ways: if an include file is specified in
+        multiple "cdef extern" blocks, only one `#include` statement is
+        generated. Second, the same include might occur multiple times
+        if we find it through multiple "cimport" paths. So we use the
+        generated code (of the form `#include "header.h"`) as dict key.
+
+        If verbatim code does not belong to any include file (i.e. it
+        was put in a `cdef extern from *` block), then we use a unique
+        dict key: namely, the `sortkey()`.
+
+        One `IncludeCode` object can contain multiple pieces of C code:
+        one optional "main piece" for the include file and several other
+        pieces for the verbatim code. The `IncludeCode.dict_update`
+        method merges the pieces of two different `IncludeCode` objects
+        if needed.
+        """
+        key = inc.mainpiece()
+        if key is None:
+            key = inc.sortkey()
+        inc.dict_update(self.c_includes, key)
+        inc = self.c_includes[key]

    def add_imported_module(self, scope):
        if scope not in self.cimported_modules:
-            for filename in scope.include_files_early:
-                self.add_include_file(filename, late=False)
-            for filename in scope.include_files_late:
-                self.add_include_file(filename, late=True)
+            for inc in scope.c_includes.values():
+                self.process_include(inc)
            self.cimported_modules.append(scope)
            for m in scope.cimported_modules:
                self.add_imported_module(m)

--- a/docs/src/userguide/external_C_code.rst
+++ b/docs/src/userguide/external_C_code.rst
@@ -328,6 +328,41 @@ are entirely on your own with this feature.  If you want to declare a name
 the C file for it, you can do this using a C name declaration.  Consider this
 an advanced feature, only for the rare cases where everything else fails.

+Including verbatim C code
+-------------------------
+
+For advanced use cases, Cython allows you to directly write C code
+as "docstring" of a ``cdef extern from`` block::
+
+    cdef extern from *:
+        """
+        /* This is C code which will be put
+         * in the .c file output by Cython */
+        static long square(long x) {return x * x;}
+        #define assign(x, y) ((x) = (y))
+        """
+        long square(long x)
+        void assign(long& x, long y)
+
+The above is essentially equivalent to having the C code in a file
+``header.h`` and writing ::
+
+    cdef extern from "header.h":
+        long square(long x)
+        void assign(long& x, long y)
+
+It is also possible to combine a header file and verbatim C code::
+
+    cdef extern from "badheader.h":
+        """
+        /* This macro breaks stuff */
+        #undef int
+        """
+        # Stuff from badheader.h
+
+In this case, the C code ``#undef int`` is put right after
+``#include "badheader.h"`` in the C code generated by Cython.
+

 Using Cython Declarations from C
 ================================

--- a/tests/compile/cnamespec.h
+++ b/tests/compile/cnamespec.h
-int c_a, c_b;
--- a/tests/compile/cnamespec.pyx
+++ b/tests/compile/cnamespec.pyx
 # mode: compile

-cdef extern from "cnamespec.h":
+cdef extern from *:
+    """
+    int c_a, c_b;
+    """
    int a "c_a", b "c_b"

 cdef struct foo "c_foo":

--- a/tests/compile/verbatiminclude_cimport.srctree
+++ b/tests/compile/verbatiminclude_cimport.srctree
+PYTHON setup.py build_ext --inplace
+
+######## setup.py ########
+
+from Cython.Build import cythonize
+from distutils.core import setup
+
+setup(
+    ext_modules = cythonize("*.pyx"),
+)
+
+######## test.pyx ########
+
+from moda cimport DEFINE_A
+from modb cimport DEFINE_B
+
+######## moda.pxd ########
+
+from verbatim cimport DEFINE_ONCE as DEFINE_A
+
+######## modb.pxd ########
+
+from verbatim cimport DEFINE_ONCE as DEFINE_B
+
+######## verbatim.pxd ########
+
+# Check that we include this only once
+cdef extern from *:
+    """
+    #ifdef DEFINE_ONCE
+    #error "DEFINE_ONCE already defined"
+    #endif
+
+    #define DEFINE_ONCE 1
+    """
+    int DEFINE_ONCE
--- a/tests/memoryview/memoryview.pyx
+++ b/tests/memoryview/memoryview.pyx
@@ -1039,3 +1039,47 @@ def min_max_tree_restructuring():
    cdef char[:] aview = a

    return max(<char>1, aview[0]), min(<char>5, aview[2])
+
+
+@cython.test_fail_if_path_exists(
+    '//MemoryViewSliceNode',
+)
+@cython.test_assert_path_exists(
+    '//MemoryViewIndexNode',
+)
+#@cython.boundscheck(False)  # reduce C code clutter
+def optimised_index_of_slice(int[:,:,:] arr, int x, int y, int z):
+    """
+    >>> arr = IntMockBuffer("A", list(range(10*10*10)), shape=(10,10,10))
+    >>> optimised_index_of_slice(arr, 2, 3, 4)
+    acquired A
+    (123, 123)
+    (223, 223)
+    (133, 133)
+    (124, 124)
+    (234, 234)
+    (123, 123)
+    (123, 123)
+    (123, 123)
+    (134, 134)
+    (134, 134)
+    (234, 234)
+    (234, 234)
+    (234, 234)
+    released A
+    """
+    print(arr[1, 2, 3], arr[1][2][3])
+    print(arr[x, 2, 3], arr[x][2][3])
+    print(arr[1, y, 3], arr[1][y][3])
+    print(arr[1, 2, z], arr[1][2][z])
+    print(arr[x, y, z], arr[x][y][z])
+
+    print(arr[1, 2, 3], arr[:, 2][1][3])
+    print(arr[1, 2, 3], arr[:, 2, :][1, 3])
+    print(arr[1, 2, 3], arr[:, 2, 3][1])
+    print(arr[1, y, z], arr[1, :][y][z])
+    print(arr[1, y, z], arr[1, :][y, z])
+
+    print(arr[x, y, z], arr[x][:][:][y][:][:][z])
+    print(arr[x, y, z], arr[:][x][:][y][:][:][z])
+    print(arr[x, y, z], arr[:, :][x][:, :][y][:][z])
--- a/tests/run/for_from_pyvar_loop_T601.pyx
+++ b/tests/run/for_from_pyvar_loop_T601.pyx
@@ -26,7 +26,8 @@ def for_in_plain_ulong():
        print j


-cdef extern from "for_from_pyvar_loop_T601_extern_def.h":
+cdef extern from *:
+    """typedef unsigned long Ulong;"""
    ctypedef unsigned long Ulong

 cdef Ulong size():

--- a/tests/run/for_from_pyvar_loop_T601_extern_def.h
+++ b/tests/run/for_from_pyvar_loop_T601_extern_def.h
-
-typedef unsigned long Ulong;
--- a/tests/run/verbatiminclude.h
+++ b/tests/run/verbatiminclude.h
+static long cube(long x)
+{
+    return x * x * x;
+}
+
+#define long broken_long
--- a/tests/run/verbatiminclude.pyx
+++ b/tests/run/verbatiminclude.pyx
+cdef extern from "verbatiminclude.h":
+    long cube(long)
+
+cdef extern from *:
+    """
+    static long square(long x)
+    {
+        return x * x;
+    }
+    """
+    long square(long)
+
+
+cdef extern from "verbatiminclude.h":
+    "typedef int myint;"
+    ctypedef int myint
+
+cdef extern from "verbatiminclude.h":
+    "#undef long"
+
+
+cdef class C:
+    cdef myint val
+
+
+cdef extern from "Python.h":
+    """
+    #define Py_SET_SIZE(obj, size)  Py_SIZE((obj)) = (size)
+    """
+    void Py_SET_SIZE(object, Py_ssize_t)
+
+
+def test_square(x):
+    """
+    >>> test_square(4)
+    16
+    """
+    return square(x)
+
+
+def test_cube(x):
+    """
+    >>> test_cube(4)
+    64
+    """
+    return cube(x)
+
+
+def test_class():
+    """
+    >>> test_class()
+    42
+    """
+    cdef C x = C()
+    x.val = 42
+    return x.val
+
+
+def test_set_size(x, size):
+    # This function manipulates Python objects in a bad way, so we
+    # do not call it. The real test is that it compiles.
+    Py_SET_SIZE(x, size)