Commit 79f7d7f3 authored by Stefan Behnel's avatar Stefan Behnel

support surrogate pair to Py_UCS4 coercion only in 16 bit Unicode builds

parent 1ba2eaea
...@@ -958,7 +958,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { ...@@ -958,7 +958,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
if (PyUnicode_Check(x)) { if (PyUnicode_Check(x)) {
if (likely(PyUnicode_GET_SIZE(x) == 1)) { if (likely(PyUnicode_GET_SIZE(x) == 1)) {
return PyUnicode_AS_UNICODE(x)[0]; return PyUnicode_AS_UNICODE(x)[0];
} else if (PyUnicode_GET_SIZE(x) == 2) { }
#if Py_UNICODE_SIZE == 2
else if (PyUnicode_GET_SIZE(x) == 2) {
Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
if (high_val >= 0xD800 && high_val <= 0xDBFF) { if (high_val >= 0xD800 && high_val <= 0xDBFF) {
Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
...@@ -967,8 +969,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { ...@@ -967,8 +969,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
} }
} }
} }
#endif
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length " "only single character unicode strings can be converted to Py_UCS4, got length "
#if PY_VERSION_HEX < 0x02050000 #if PY_VERSION_HEX < 0x02050000
"%d", "%d",
#else #else
......
...@@ -68,13 +68,13 @@ def unicode_ordinal(Py_UCS4 i): ...@@ -68,13 +68,13 @@ def unicode_ordinal(Py_UCS4 i):
>>> unicode_ordinal(u0[:0]) >>> unicode_ordinal(u0[:0])
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 0 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0
More than one character: More than one character:
>>> unicode_ordinal(u0+u1) >>> unicode_ordinal(u0+u1)
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 2 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
""" """
return i return i
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment