support surrogate pair to Py_UCS4 coercion only in 16 bit Unicode builds

79f7d7f3 · Stefan Behnel · 1ba2eaea · 79f7d7f3 · 79f7d7f3
Commit 79f7d7f3 authored Jan 31, 2011 by Stefan Behnel
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

Cython/Compiler/PyrexTypes.py Cython/Compiler/PyrexTypes.py +5 -2

tests/run/py_ucs4_type.pyx tests/run/py_ucs4_type.pyx +2 -2

No files found.
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -958,7 +958,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
   if (PyUnicode_Check(x)) {
       if (likely(PyUnicode_GET_SIZE(x) == 1)) {
           return PyUnicode_AS_UNICODE(x)[0];
-       } else if (PyUnicode_GET_SIZE(x) == 2) {
+       }
+       #if Py_UNICODE_SIZE == 2
+       else if (PyUnicode_GET_SIZE(x) == 2) {
           Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
           if (high_val >= 0xD800 && high_val <= 0xDBFF) {
               Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
@@ -967,8 +969,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
               }
           }
       }
+       #endif
       PyErr_Format(PyExc_ValueError,
-           "only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length "
+           "only single character unicode strings can be converted to Py_UCS4, got length "
           #if PY_VERSION_HEX < 0x02050000
           "%d",
           #else

--- a/tests/run/py_ucs4_type.pyx
+++ b/tests/run/py_ucs4_type.pyx
@@ -68,13 +68,13 @@ def unicode_ordinal(Py_UCS4 i):
    >>> unicode_ordinal(u0[:0])
    Traceback (most recent call last):
    ...
-    ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 0
+    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0

    More than one character:
    >>> unicode_ordinal(u0+u1)
    Traceback (most recent call last):
    ...
-    ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 2
+    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
    """
    return i