Commit a89a8812 authored by scoder's avatar scoder Committed by Stefan Behnel

Make "__Pyx_UnicodeContainsUCS4()" work for WCHAR unicode strings with Py3.9+. (GH-4135)

* Use the same fallback as for missing PEP-393 support.
* Prepare for "PyUnicode_READY()" and "PyUnicode_WCHAR_KIND" to be removed in Py3.12.
  See https://www.python.org/dev/peps/pep-0623/
* Avoid C compiler warnings about deprecated C-API functions in Py3.9+.

Closes https://github.com/cython/cython/issues/3925
parent 7a4e43da
...@@ -523,8 +523,15 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { ...@@ -523,8 +523,15 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
/* new Py3.3 unicode type (PEP 393) */ /* new Py3.3 unicode type (PEP 393) */
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1 #define CYTHON_PEP393_ENABLED 1
#if defined(PyUnicode_IS_READY)
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \ #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
0 : _PyUnicode_Ready((PyObject *)(op))) 0 : _PyUnicode_Ready((PyObject *)(op)))
#else
// Py3.12 / PEP-623 will remove wstr type unicode strings and all of the PyUnicode_READY() machinery.
#define __Pyx_PyUnicode_READY(op) (0)
#endif
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
#define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u)
...@@ -533,7 +540,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { ...@@ -533,7 +540,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
#if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE) #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
// Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12.
// https://www.python.org/dev/peps/pep-0623/
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
#else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#endif
#else #else
#define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
#endif #endif
......
...@@ -66,8 +66,19 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch ...@@ -66,8 +66,19 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
//////////////////// PyUCS4InUnicode //////////////////// //////////////////// PyUCS4InUnicode ////////////////////
#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
#if PY_VERSION_HEX < 0x03090000 #if PY_VERSION_HEX < 0x03090000
#if Py_UNICODE_SIZE == 2 #define __Pyx_PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(op)
#define __Pyx_PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(op)
#else
// Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12.
// https://www.python.org/dev/peps/pep-0623/
#define __Pyx_PyUnicode_AS_UNICODE(op) (((PyASCIIObject *)(op))->wstr)
#define __Pyx_PyUnicode_GET_SIZE(op) ((PyCompactUnicodeObject *)(op))->wstr_length
#endif
#if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2
static int __Pyx_PyUnicodeBufferContainsUCS4_SP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) { static int __Pyx_PyUnicodeBufferContainsUCS4_SP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
/* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */ /* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */
Py_UNICODE high_val, low_val; Py_UNICODE high_val, low_val;
...@@ -95,7 +106,10 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t ...@@ -95,7 +106,10 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t
static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) { static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
#if CYTHON_PEP393_ENABLED #if CYTHON_PEP393_ENABLED
const int kind = PyUnicode_KIND(unicode); const int kind = PyUnicode_KIND(unicode);
if (likely(kind != PyUnicode_WCHAR_KIND)) { #ifdef PyUnicode_WCHAR_KIND
if (likely(kind != PyUnicode_WCHAR_KIND))
#endif
{
Py_ssize_t i; Py_ssize_t i;
const void* udata = PyUnicode_DATA(unicode); const void* udata = PyUnicode_DATA(unicode);
const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
...@@ -106,20 +120,23 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch ...@@ -106,20 +120,23 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
} }
#elif PY_VERSION_HEX >= 0x03090000 #elif PY_VERSION_HEX >= 0x03090000
#error Cannot use "UChar in Unicode" in Python 3.9 without PEP-393 unicode strings. #error Cannot use "UChar in Unicode" in Python 3.9 without PEP-393 unicode strings.
#elif !defined(PyUnicode_AS_UNICODE)
#error Cannot use "UChar in Unicode" in Python < 3.9 without Py_UNICODE support.
#endif #endif
#if PY_VERSION_HEX < 0x03090000
#if Py_UNICODE_SIZE == 2 #if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
if (unlikely(character > 65535)) { #if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2
if ((sizeof(Py_UNICODE) == 2) && unlikely(character > 65535)) {
return __Pyx_PyUnicodeBufferContainsUCS4_SP( return __Pyx_PyUnicodeBufferContainsUCS4_SP(
PyUnicode_AS_UNICODE(unicode), __Pyx_PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), __Pyx_PyUnicode_GET_SIZE(unicode),
character); character);
} else } else
#endif #endif
{ {
return __Pyx_PyUnicodeBufferContainsUCS4_BMP( return __Pyx_PyUnicodeBufferContainsUCS4_BMP(
PyUnicode_AS_UNICODE(unicode), __Pyx_PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), __Pyx_PyUnicode_GET_SIZE(unicode),
character); character);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment