Make "__Pyx_UnicodeContainsUCS4()" work for WCHAR unicode strings with Py3.9+. (GH-4135)

* Use the same fallback as for missing PEP-393 support. * Prepare for "PyUnicode_READY()" and "PyUnicode_WCHAR_KIND" to be removed in Py3.12. See https://www.python.org/dev/peps/pep-0623/ * Avoid C compiler warnings about deprecated C-API functions in Py3.9+. Closes https://github.com/cython/cython/issues/3925

Make "__Pyx_UnicodeContainsUCS4()" work for WCHAR unicode strings with Py3.9+. (GH-4135)
* Use the same fallback as for missing PEP-393 support. * Prepare for "PyUnicode_READY()" and "PyUnicode_WCHAR_KIND" to be removed in Py3.12. See https://www.python.org/dev/peps/pep-0623/ * Avoid C compiler warnings about deprecated C-API functions in Py3.9+. Closes https://github.com/cython/cython/issues/3925
a89a8812 · scoder · Stefan Behnel · 7a4e43da · a89a8812 · a89a8812
Commit a89a8812 authored Apr 25, 2021 by scoder Committed by Stefan Behnel Apr 25, 2021
Show whitespace changes
Inline Side-by-side

Showing with 39 additions and 9 deletions

Cython/Utility/ModuleSetupCode.c Cython/Utility/ModuleSetupCode.c +13 -0

Cython/Utility/StringTools.c Cython/Utility/StringTools.c +26 -9

No files found.
--- a/Cython/Utility/ModuleSetupCode.c
+++ b/Cython/Utility/ModuleSetupCode.c
@@ -523,8 +523,15 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
 /* new Py3.3 unicode type (PEP 393) */
 #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
  #define CYTHON_PEP393_ENABLED 1
+  #if defined(PyUnicode_IS_READY)
  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ? \
                                              0 : _PyUnicode_Ready((PyObject *)(op)))
+  #else
+  // Py3.12 / PEP-623 will remove wstr type unicode strings and all of the PyUnicode_READY() machinery.
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #endif
  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
@@ -533,7 +540,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, ch)
  #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
+  #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+  // Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12.
+  // https://www.python.org/dev/peps/pep-0623/
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+  #else
  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+  #endif
  #else
  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_LENGTH(u))
  #endif

--- a/Cython/Utility/StringTools.c
+++ b/Cython/Utility/StringTools.c
@@ -66,8 +66,19 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
 //////////////////// PyUCS4InUnicode ////////////////////
+#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
 #if PY_VERSION_HEX < 0x03090000
-#if Py_UNICODE_SIZE == 2
+#define __Pyx_PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(op)
+#define __Pyx_PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(op)
+#else
+// Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12.
+// https://www.python.org/dev/peps/pep-0623/
+#define __Pyx_PyUnicode_AS_UNICODE(op) (((PyASCIIObject *)(op))->wstr)
+#define __Pyx_PyUnicode_GET_SIZE(op) ((PyCompactUnicodeObject *)(op))->wstr_length
+#endif
+#if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2
 static int __Pyx_PyUnicodeBufferContainsUCS4_SP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
    /* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */
    Py_UNICODE high_val, low_val;
@@ -95,7 +106,10 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t
 static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
 #if CYTHON_PEP393_ENABLED
    const int kind = PyUnicode_KIND(unicode);
-    if (likely(kind != PyUnicode_WCHAR_KIND)) {
+    #ifdef PyUnicode_WCHAR_KIND
+    if (likely(kind != PyUnicode_WCHAR_KIND))
+    #endif
+    {
        Py_ssize_t i;
        const void* udata = PyUnicode_DATA(unicode);
        const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
@@ -106,20 +120,23 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
    }
 #elif PY_VERSION_HEX >= 0x03090000
    #error Cannot use "UChar in Unicode" in Python 3.9 without PEP-393 unicode strings.
+#elif !defined(PyUnicode_AS_UNICODE)
+    #error Cannot use "UChar in Unicode" in Python < 3.9 without Py_UNICODE support.
 #endif
-#if PY_VERSION_HEX < 0x03090000
-#if Py_UNICODE_SIZE == 2
+#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
-    if (unlikely(character > 65535)) {
+#if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2
+    if ((sizeof(Py_UNICODE) == 2) && unlikely(character > 65535)) {
        return __Pyx_PyUnicodeBufferContainsUCS4_SP(
-            PyUnicode_AS_UNICODE(unicode),
+            __Pyx_PyUnicode_AS_UNICODE(unicode),
-            PyUnicode_GET_SIZE(unicode),
+            __Pyx_PyUnicode_GET_SIZE(unicode),
            character);
    } else
 #endif
    {
        return __Pyx_PyUnicodeBufferContainsUCS4_BMP(
-            PyUnicode_AS_UNICODE(unicode),
+            __Pyx_PyUnicode_AS_UNICODE(unicode),
-            PyUnicode_GET_SIZE(unicode),
+            __Pyx_PyUnicode_GET_SIZE(unicode),
            character);
    }