Split UnicodeContains helper to reduce inlined code.

e9376a99 · Stefan Behnel · c6a6b3c9 · e9376a99
Commit e9376a99 authored Sep 12, 2017 by Stefan Behnel
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 25 deletions

Cython/Utility/StringTools.c Cython/Utility/StringTools.c +32 -25

No files found.
--- a/Cython/Utility/StringTools.c
+++ b/Cython/Utility/StringTools.c
@@ -63,10 +63,31 @@ static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character) {
 //////////////////// PyUCS4InUnicode.proto ////////////////////

 static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character); /*proto*/
-static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character); /*proto*/

 //////////////////// PyUCS4InUnicode ////////////////////

+static int __Pyx_PyUnicodeBufferContainsUCS4_SP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
+    /* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */
+    Py_UNICODE high_val, low_val;
+    Py_UNICODE* pos;
+    high_val = (Py_UNICODE) (0xD800 | (((character - 0x10000) >> 10) & ((1<<10)-1)));
+    low_val  = (Py_UNICODE) (0xDC00 | ( (character - 0x10000)        & ((1<<10)-1)));
+    for (pos=buffer; pos < buffer+length-1; pos++) {
+        if (unlikely((high_val == pos[0]) & (low_val == pos[1]))) return 1;
+    }
+    return 0;
+}
+
+static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
+    Py_UNICODE uchar;
+    Py_UNICODE* pos;
+    uchar = (Py_UNICODE) character;
+    for (pos=buffer; pos < buffer+length; pos++) {
+        if (unlikely(uchar == pos[0])) return 1;
+    }
+    return 0;
+}
+
 static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
 #if CYTHON_PEP393_ENABLED
    const int kind = PyUnicode_KIND(unicode);
@@ -80,32 +101,18 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
        return 0;
    }
 #endif
-    return __Pyx_PyUnicodeBufferContainsUCS4(
+    if (Py_UNICODE_SIZE == 2 && unlikely(character > 65535)) {
+        return __Pyx_PyUnicodeBufferContainsUCS4_SP(
+            PyUnicode_AS_UNICODE(unicode),
+            PyUnicode_GET_SIZE(unicode),
+            character);
+    } else {
+        return __Pyx_PyUnicodeBufferContainsUCS4_BMP(
            PyUnicode_AS_UNICODE(unicode),
            PyUnicode_GET_SIZE(unicode),
            character);
-}

-static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
-    Py_UNICODE uchar;
-    Py_UNICODE* pos;
-    #if Py_UNICODE_SIZE == 2
-    if (character > 65535) {
-        /* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */
-        Py_UNICODE high_val, low_val;
-        high_val = (Py_UNICODE) (0xD800 | (((character - 0x10000) >> 10) & ((1<<10)-1)));
-        low_val  = (Py_UNICODE) (0xDC00 | ( (character - 0x10000)        & ((1<<10)-1)));
-        for (pos=buffer; pos < buffer+length-1; pos++) {
-            if (unlikely(high_val == pos[0]) & unlikely(low_val == pos[1])) return 1;
-        }
-        return 0;
    }
-    #endif
-    uchar = (Py_UNICODE) character;
-    for (pos=buffer; pos < buffer+length; pos++) {
-        if (unlikely(uchar == pos[0])) return 1;
-    }
-    return 0;
 }