Commit 79f7d7f3 authored by Stefan Behnel's avatar Stefan Behnel

support surrogate pair to Py_UCS4 coercion only in 16 bit Unicode builds

parent 1ba2eaea
......@@ -958,7 +958,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
if (PyUnicode_Check(x)) {
if (likely(PyUnicode_GET_SIZE(x) == 1)) {
return PyUnicode_AS_UNICODE(x)[0];
} else if (PyUnicode_GET_SIZE(x) == 2) {
}
#if Py_UNICODE_SIZE == 2
else if (PyUnicode_GET_SIZE(x) == 2) {
Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
if (high_val >= 0xD800 && high_val <= 0xDBFF) {
Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
......@@ -967,8 +969,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
}
}
}
#endif
PyErr_Format(PyExc_ValueError,
"only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length "
"only single character unicode strings can be converted to Py_UCS4, got length "
#if PY_VERSION_HEX < 0x02050000
"%d",
#else
......
......@@ -68,13 +68,13 @@ def unicode_ordinal(Py_UCS4 i):
>>> unicode_ordinal(u0[:0])
Traceback (most recent call last):
...
ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 0
ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0
More than one character:
>>> unicode_ordinal(u0+u1)
Traceback (most recent call last):
...
ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 2
ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
"""
return i
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment