Commit ae864855 authored by Victor Stinner's avatar Victor Stinner

Speedup find_maxchar_surrogates() for 32-bit wchar_t

If we have at least one character in U+10000-U+10FFFF, we know that we must use
PyUnicode_4BYTE_KIND kind.
parent b9275c10
...@@ -1060,19 +1060,17 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, ...@@ -1060,19 +1060,17 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
const wchar_t *iter; const wchar_t *iter;
assert(num_surrogates != NULL && maxchar != NULL); assert(num_surrogates != NULL && maxchar != NULL);
if (num_surrogates == NULL || maxchar == NULL) {
PyErr_SetString(PyExc_SystemError,
"unexpected NULL arguments to "
"PyUnicode_FindMaxCharAndNumSurrogatePairs");
return -1;
}
*num_surrogates = 0; *num_surrogates = 0;
*maxchar = 0; *maxchar = 0;
for (iter = begin; iter < end; ) { for (iter = begin; iter < end; ) {
if (*iter > *maxchar) if (*iter > *maxchar) {
*maxchar = *iter; *maxchar = *iter;
#if SIZEOF_WCHAR_T != 2
if (*maxchar >= 0x10000)
return 0;
#endif
}
#if SIZEOF_WCHAR_T == 2 #if SIZEOF_WCHAR_T == 2
if (*iter >= 0xD800 && *iter <= 0xDBFF if (*iter >= 0xD800 && *iter <= 0xDBFF
&& (iter+1) < end && iter[1] >= 0xDC00 && iter[1] <= 0xDFFF) && (iter+1) < end && iter[1] >= 0xDC00 && iter[1] <= 0xDFFF)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment