move string related utility functions from Optimize.c and Optimize.py to StringTools.c

a46b2a59 · Stefan Behnel · 04160be8 · a46b2a59 · a46b2a59 · a46b2a59
Commit a46b2a59 authored Feb 23, 2013 by Stefan Behnel
Showing with 189 additions and 186 deletions

Cython/Compiler/Optimize.py Cython/Compiler/Optimize.py +9 -29

Cython/Utility/Optimize.c Cython/Utility/Optimize.c +0 -157

Cython/Utility/StringTools.c Cython/Utility/StringTools.c +180 -0

No files found.
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -1788,13 +1788,14 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
                node = ExprNodes.PythonCapiCallNode(
                    coerce_node.pos, "__Pyx_PyBytes_GetItemInt",
                    self.PyBytes_GetItemInt_func_type,
-                    args = [
+                    args=[
                        arg.base.as_none_safe_node("'NoneType' object is not subscriptable"),
                        index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env),
                        bound_check_node,
                        ],
-                    is_temp = True,
-                    utility_code=load_c_utility('bytes_index'))
+                    is_temp=True,
+                    utility_code=UtilityCode.load_cached(
+                        'bytes_index', 'StringTools.c'))
                if coerce_node.type is not PyrexTypes.c_char_type:
                    node = node.coerce_to(coerce_node.type, env)
                return node
@@ -2342,7 +2343,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
        method_name = node.function.attribute
        if method_name == 'istitle':
            # istitle() doesn't directly map to Py_UNICODE_ISTITLE()
-            utility_code = load_c_utility("py_unicode_istitle")
+            utility_code = UtilityCode.load_cached(
+                "py_unicode_istitle", "StringTools.c")
            function_name = '__Pyx_Py_UNICODE_ISTITLE'
        else:
            utility_code = None
@@ -2898,31 +2900,9 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
            args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env())


-unicode_tailmatch_utility_code = load_c_utility('unicode_tailmatch')
-
-bytes_tailmatch_utility_code = load_c_utility('bytes_tailmatch')
-
-str_tailmatch_utility_code = UtilityCode(
-proto = '''
-static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
-                                               Py_ssize_t end, int direction);
-''',
-# We do not use a C compiler macro here to avoid "unused function"
-# warnings for the *_Tailmatch() function that is not being used in
-# the specific CPython version.  The C compiler will generate the same
-# code anyway, and will usually just remove the unused function.
-impl = '''
-static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
-                                               Py_ssize_t end, int direction)
-{
-    if (PY_MAJOR_VERSION < 3)
-        return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction);
-    else
-        return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction);
-}
-''',
-requires=[unicode_tailmatch_utility_code, bytes_tailmatch_utility_code]
-)
+unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c')
+bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c')
+str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c')


 tpnew_utility_code = UtilityCode(

--- a/Cython/Utility/Optimize.c
+++ b/Cython/Utility/Optimize.c
@@ -112,163 +112,6 @@ static PyObject* __Pyx_PyObject_PopIndex(PyObject* L, Py_ssize_t ix) {
 }


-/////////////// py_unicode_istitle.proto ///////////////
-
-// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
-// additionally allows character that comply with Py_UNICODE_ISUPPER()
-
-#if PY_VERSION_HEX < 0x030200A2
-static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar)
-#else
-static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar)
-#endif
-{
-    return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
-}
-
-
-/////////////// unicode_tailmatch.proto ///////////////
-
-// Python's unicode.startswith() and unicode.endswith() support a
-// tuple of prefixes/suffixes, whereas it's much more common to
-// test for a single unicode string.
-
-static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
-                                     Py_ssize_t start, Py_ssize_t end, int direction) {
-    if (unlikely(PyTuple_Check(substr))) {
-        Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
-        for (i = 0; i < count; i++) {
-            int result;
-#if CYTHON_COMPILING_IN_CPYTHON
-            result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
-                                         start, end, direction);
-#else
-            PyObject* sub = PySequence_GetItem(substr, i);
-            if (unlikely(!sub)) return -1;
-            result = PyUnicode_Tailmatch(s, sub, start, end, direction);
-            Py_DECREF(sub);
-#endif
-            if (result) {
-                return result;
-            }
-        }
-        return 0;
-    }
-    return PyUnicode_Tailmatch(s, substr, start, end, direction);
-}
-
-
-/////////////// bytes_tailmatch.proto ///////////////
-
-static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
-                                         Py_ssize_t end, int direction)
-{
-    const char* self_ptr = PyBytes_AS_STRING(self);
-    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
-    const char* sub_ptr;
-    Py_ssize_t sub_len;
-    int retval;
-    
-#if PY_VERSION_HEX >= 0x02060000
-    Py_buffer view;
-    view.obj = NULL;
-#endif
-    
-    if ( PyBytes_Check(arg) ) {
-        sub_ptr = PyBytes_AS_STRING(arg);
-        sub_len = PyBytes_GET_SIZE(arg);
-    }
-#if PY_MAJOR_VERSION < 3
-    // Python 2.x allows mixing unicode and str
-    else if ( PyUnicode_Check(arg) ) {
-        return PyUnicode_Tailmatch(self, arg, start, end, direction);
-    }
-#endif
-    else {
-#if PY_VERSION_HEX < 0x02060000
-        if (unlikely(PyObject_AsCharBuffer(arg, &sub_ptr, &sub_len)))
-            return -1;
-#else
-        if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1))
-            return -1;
-        sub_ptr = (const char*) view.buf;
-        sub_len = view.len;
-#endif
-    }
-    
-    if (end > self_len)
-        end = self_len;
-    else if (end < 0)
-        end += self_len;
-    if (end < 0)
-        end = 0;
-    if (start < 0)
-        start += self_len;
-    if (start < 0)
-        start = 0;
-    
-    if (direction > 0) {
-        /* endswith */
-        if (end-sub_len > start)
-            start = end - sub_len;
-    }
-    
-    if (start + sub_len <= end)
-        retval = !memcmp(self_ptr+start, sub_ptr, sub_len);
-    else
-        retval = 0;
-    
-#if PY_VERSION_HEX >= 0x02060000
-    if (view.obj)
-        PyBuffer_Release(&view);
-#endif
-    
-    return retval;
-}
-
-static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr, Py_ssize_t start,
-                                   Py_ssize_t end, int direction)
-{
-    if (unlikely(PyTuple_Check(substr))) {
-        Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
-        for (i = 0; i < count; i++) {
-            int result;
-#if CYTHON_COMPILING_IN_CPYTHON
-            result = __Pyx_PyBytes_SingleTailmatch(self, PyTuple_GET_ITEM(substr, i),
-                                                   start, end, direction);
-#else
-            PyObject* sub = PySequence_GetItem(substr, i);
-            if (unlikely(!sub)) return -1;
-            result = __Pyx_PyBytes_SingleTailmatch(self, sub, start, end, direction);
-            Py_DECREF(sub);
-#endif
-            if (result) {
-                return result;
-            }
-        }
-        return 0;
-    }
-    
-    return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction);
-}
-
-
-/////////////// bytes_index.proto ///////////////
-
-static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
-    if (check_bounds) {
-        Py_ssize_t size = PyBytes_GET_SIZE(bytes);
-        if (unlikely(index >= size) | ((index < 0) & unlikely(index < -size))) {
-            PyErr_Format(PyExc_IndexError, "string index out of range");
-            return -1;
-        }
-    }
-    if (index < 0)
-        index += PyBytes_GET_SIZE(bytes);
-    return PyBytes_AS_STRING(bytes)[index];
-}
-
-
 /////////////// dict_getitem_default.proto ///////////////

 static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); /*proto*/

--- a/Cython/Utility/StringTools.c
+++ b/Cython/Utility/StringTools.c
@@ -418,3 +418,183 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
 #endif
 }
+
+
+/////////////// py_unicode_istitle.proto ///////////////
+
+// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
+// additionally allows character that comply with Py_UNICODE_ISUPPER()
+
+#if PY_VERSION_HEX < 0x030200A2
+static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar)
+#else
+static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar)
+#endif
+{
+    return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
+}
+
+
+/////////////// unicode_tailmatch.proto ///////////////
+
+// Python's unicode.startswith() and unicode.endswith() support a
+// tuple of prefixes/suffixes, whereas it's much more common to
+// test for a single unicode string.
+
+static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
+                                     Py_ssize_t start, Py_ssize_t end, int direction) {
+    if (unlikely(PyTuple_Check(substr))) {
+        Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
+        for (i = 0; i < count; i++) {
+            int result;
+#if CYTHON_COMPILING_IN_CPYTHON
+            result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
+                                         start, end, direction);
+#else
+            PyObject* sub = PySequence_GetItem(substr, i);
+            if (unlikely(!sub)) return -1;
+            result = PyUnicode_Tailmatch(s, sub, start, end, direction);
+            Py_DECREF(sub);
+#endif
+            if (result) {
+                return result;
+            }
+        }
+        return 0;
+    }
+    return PyUnicode_Tailmatch(s, substr, start, end, direction);
+}
+
+
+/////////////// bytes_tailmatch.proto ///////////////
+
+static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
+                                         Py_ssize_t end, int direction)
+{
+    const char* self_ptr = PyBytes_AS_STRING(self);
+    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
+    const char* sub_ptr;
+    Py_ssize_t sub_len;
+    int retval;
+
+#if PY_VERSION_HEX >= 0x02060000
+    Py_buffer view;
+    view.obj = NULL;
+#endif
+
+    if ( PyBytes_Check(arg) ) {
+        sub_ptr = PyBytes_AS_STRING(arg);
+        sub_len = PyBytes_GET_SIZE(arg);
+    }
+#if PY_MAJOR_VERSION < 3
+    // Python 2.x allows mixing unicode and str
+    else if ( PyUnicode_Check(arg) ) {
+        return PyUnicode_Tailmatch(self, arg, start, end, direction);
+    }
+#endif
+    else {
+#if PY_VERSION_HEX < 0x02060000
+        if (unlikely(PyObject_AsCharBuffer(arg, &sub_ptr, &sub_len)))
+            return -1;
+#else
+        if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1))
+            return -1;
+        sub_ptr = (const char*) view.buf;
+        sub_len = view.len;
+#endif
+    }
+
+    if (end > self_len)
+        end = self_len;
+    else if (end < 0)
+        end += self_len;
+    if (end < 0)
+        end = 0;
+    if (start < 0)
+        start += self_len;
+    if (start < 0)
+        start = 0;
+
+    if (direction > 0) {
+        /* endswith */
+        if (end-sub_len > start)
+            start = end - sub_len;
+    }
+
+    if (start + sub_len <= end)
+        retval = !memcmp(self_ptr+start, sub_ptr, sub_len);
+    else
+        retval = 0;
+
+#if PY_VERSION_HEX >= 0x02060000
+    if (view.obj)
+        PyBuffer_Release(&view);
+#endif
+
+    return retval;
+}
+
+static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr, Py_ssize_t start,
+                                   Py_ssize_t end, int direction)
+{
+    if (unlikely(PyTuple_Check(substr))) {
+        Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
+        for (i = 0; i < count; i++) {
+            int result;
+#if CYTHON_COMPILING_IN_CPYTHON
+            result = __Pyx_PyBytes_SingleTailmatch(self, PyTuple_GET_ITEM(substr, i),
+                                                   start, end, direction);
+#else
+            PyObject* sub = PySequence_GetItem(substr, i);
+            if (unlikely(!sub)) return -1;
+            result = __Pyx_PyBytes_SingleTailmatch(self, sub, start, end, direction);
+            Py_DECREF(sub);
+#endif
+            if (result) {
+                return result;
+            }
+        }
+        return 0;
+    }
+
+    return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction);
+}
+
+
+/////////////// str_tailmatch.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
+                                               Py_ssize_t end, int direction);
+
+/////////////// str_tailmatch ///////////////
+//@requires: bytes_tailmatch
+//@requires: unicode_tailmatch
+
+static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
+                                               Py_ssize_t end, int direction)
+{
+    // We do not use a C compiler macro here to avoid "unused function"
+    // warnings for the *_Tailmatch() function that is not being used in
+    // the specific CPython version.  The C compiler will generate the same
+    // code anyway, and will usually just remove the unused function.
+    if (PY_MAJOR_VERSION < 3)
+        return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction);
+    else
+        return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction);
+}
+
+
+/////////////// bytes_index.proto ///////////////
+
+static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
+    if (check_bounds) {
+        Py_ssize_t size = PyBytes_GET_SIZE(bytes);
+        if (unlikely(index >= size) | ((index < 0) & unlikely(index < -size))) {
+            PyErr_Format(PyExc_IndexError, "string index out of range");
+            return -1;
+        }
+    }
+    if (index < 0)
+        index += PyBytes_GET_SIZE(bytes);
+    return PyBytes_AS_STRING(bytes)[index];
+}