Commit 7e303731 authored by Benjamin Peterson's avatar Benjamin Peterson

remove MAX_MAXCHAR because it's unsafe for computing maximum codepoitn value (see #18183)

parent 0e547b66
...@@ -566,6 +566,9 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -566,6 +566,9 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('\U0008fffe'.lower(), '\U0008fffe') self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
self.assertEqual('\u2177'.lower(), '\u2177') self.assertEqual('\u2177'.lower(), '\u2177')
# See issue #18183 for this one.
'\U00010000\U00100000'.lower()
def test_casefold(self): def test_casefold(self):
self.assertEqual('hello'.casefold(), 'hello') self.assertEqual('hello'.casefold(), 'hello')
self.assertEqual('hELlo'.casefold(), 'hello') self.assertEqual('hELlo'.casefold(), 'hello')
......
...@@ -12,6 +12,9 @@ What's New in Python 3.3.3 release candidate 1? ...@@ -12,6 +12,9 @@ What's New in Python 3.3.3 release candidate 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #18183: Fix various unicode operations on strings with large unicode
codepoints.
- Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows(). - Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows().
- Issue #18038: SyntaxError raised during compilation sources with illegal - Issue #18038: SyntaxError raised during compilation sources with illegal
......
...@@ -112,11 +112,6 @@ extern "C" { ...@@ -112,11 +112,6 @@ extern "C" {
#define _PyUnicode_DATA_ANY(op) \ #define _PyUnicode_DATA_ANY(op) \
(((PyUnicodeObject*)(op))->data.any) (((PyUnicodeObject*)(op))->data.any)
/* Optimized version of Py_MAX() to compute the maximum character:
use it when your are computing the second argument of PyUnicode_New() */
#define MAX_MAXCHAR(maxchar1, maxchar2) \
((maxchar1) | (maxchar2))
#undef PyUnicode_READY #undef PyUnicode_READY
#define PyUnicode_READY(op) \ #define PyUnicode_READY(op) \
(assert(_PyUnicode_CHECK(op)), \ (assert(_PyUnicode_CHECK(op)), \
...@@ -2495,7 +2490,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2495,7 +2490,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
case 'c': case 'c':
{ {
Py_UCS4 ordinal = va_arg(count, int); Py_UCS4 ordinal = va_arg(count, int);
maxchar = MAX_MAXCHAR(maxchar, ordinal); maxchar = Py_MAX(maxchar, ordinal);
n++; n++;
break; break;
} }
...@@ -2591,7 +2586,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2591,7 +2586,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
/* since PyUnicode_DecodeUTF8 returns already flexible /* since PyUnicode_DecodeUTF8 returns already flexible
unicode objects, there is no need to call ready on them */ unicode objects, there is no need to call ready on them */
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(str); n += PyUnicode_GET_LENGTH(str);
/* Remember the str and switch to the next slot */ /* Remember the str and switch to the next slot */
*callresult++ = str; *callresult++ = str;
...@@ -2604,7 +2599,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2604,7 +2599,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (PyUnicode_READY(obj) == -1) if (PyUnicode_READY(obj) == -1)
goto fail; goto fail;
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(obj); n += PyUnicode_GET_LENGTH(obj);
break; break;
} }
...@@ -2619,7 +2614,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2619,7 +2614,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (PyUnicode_READY(obj) == -1) if (PyUnicode_READY(obj) == -1)
goto fail; goto fail;
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(obj); n += PyUnicode_GET_LENGTH(obj);
*callresult++ = NULL; *callresult++ = NULL;
} }
...@@ -2632,7 +2627,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2632,7 +2627,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
goto fail; goto fail;
} }
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(str_obj); n += PyUnicode_GET_LENGTH(str_obj);
*callresult++ = str_obj; *callresult++ = str_obj;
} }
...@@ -2651,7 +2646,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2651,7 +2646,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
goto fail; goto fail;
} }
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(str); n += PyUnicode_GET_LENGTH(str);
/* Remember the str and switch to the next slot */ /* Remember the str and switch to the next slot */
*callresult++ = str; *callresult++ = str;
...@@ -2670,7 +2665,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2670,7 +2665,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
goto fail; goto fail;
} }
argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr); argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(repr); n += PyUnicode_GET_LENGTH(repr);
/* Remember the repr and switch to the next slot */ /* Remember the repr and switch to the next slot */
*callresult++ = repr; *callresult++ = repr;
...@@ -2689,7 +2684,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -2689,7 +2684,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
goto fail; goto fail;
} }
argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii); argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
maxchar = MAX_MAXCHAR(maxchar, argmaxchar); maxchar = Py_MAX(maxchar, argmaxchar);
n += PyUnicode_GET_LENGTH(ascii); n += PyUnicode_GET_LENGTH(ascii);
/* Remember the repr and switch to the next slot */ /* Remember the repr and switch to the next slot */
*callresult++ = ascii; *callresult++ = ascii;
...@@ -8628,11 +8623,11 @@ fix_decimal_and_space_to_ascii(PyObject *self) ...@@ -8628,11 +8623,11 @@ fix_decimal_and_space_to_ascii(PyObject *self)
} }
if (fixed != 0) { if (fixed != 0) {
modified = 1; modified = 1;
maxchar = MAX_MAXCHAR(maxchar, fixed); maxchar = Py_MAX(maxchar, fixed);
PyUnicode_WRITE(kind, data, i, fixed); PyUnicode_WRITE(kind, data, i, fixed);
} }
else else
maxchar = MAX_MAXCHAR(maxchar, ch); maxchar = Py_MAX(maxchar, ch);
} }
} }
...@@ -8673,7 +8668,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, ...@@ -8673,7 +8668,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
int decimal = Py_UNICODE_TODECIMAL(ch); int decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0) if (decimal >= 0)
ch = '0' + decimal; ch = '0' + decimal;
maxchar = MAX_MAXCHAR(maxchar, ch); maxchar = Py_MAX(maxchar, ch);
} }
} }
...@@ -8914,7 +8909,7 @@ _PyUnicode_InsertThousandsGrouping( ...@@ -8914,7 +8909,7 @@ _PyUnicode_InsertThousandsGrouping(
if (unicode == NULL) { if (unicode == NULL) {
*maxchar = 127; *maxchar = 127;
if (len != n_digits) { if (len != n_digits) {
*maxchar = MAX_MAXCHAR(*maxchar, *maxchar = Py_MAX(*maxchar,
PyUnicode_MAX_CHAR_VALUE(thousands_sep)); PyUnicode_MAX_CHAR_VALUE(thousands_sep));
} }
} }
...@@ -9309,14 +9304,14 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma ...@@ -9309,14 +9304,14 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma
c = PyUnicode_READ(kind, data, 0); c = PyUnicode_READ(kind, data, 0);
n_res = _PyUnicode_ToUpperFull(c, mapped); n_res = _PyUnicode_ToUpperFull(c, mapped);
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
for (i = 1; i < length; i++) { for (i = 1; i < length; i++) {
c = PyUnicode_READ(kind, data, i); c = PyUnicode_READ(kind, data, i);
n_res = lower_ucs4(kind, data, length, i, c, mapped); n_res = lower_ucs4(kind, data, length, i, c, mapped);
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
} }
...@@ -9341,7 +9336,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc ...@@ -9341,7 +9336,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
mapped[0] = c; mapped[0] = c;
} }
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
} }
...@@ -9362,7 +9357,7 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, ...@@ -9362,7 +9357,7 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res,
else else
n_res = _PyUnicode_ToUpperFull(c, mapped); n_res = _PyUnicode_ToUpperFull(c, mapped);
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
} }
...@@ -9391,7 +9386,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc ...@@ -9391,7 +9386,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
Py_UCS4 mapped[3]; Py_UCS4 mapped[3];
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped); int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
} }
...@@ -9416,7 +9411,7 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar ...@@ -9416,7 +9411,7 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar
n_res = _PyUnicode_ToTitleFull(c, mapped); n_res = _PyUnicode_ToTitleFull(c, mapped);
for (j = 0; j < n_res; j++) { for (j = 0; j < n_res; j++) {
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]); *maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j]; res[k++] = mapped[j];
} }
...@@ -9571,7 +9566,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) ...@@ -9571,7 +9566,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
goto onError; goto onError;
sz += PyUnicode_GET_LENGTH(item); sz += PyUnicode_GET_LENGTH(item);
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
maxchar = MAX_MAXCHAR(maxchar, item_maxchar); maxchar = Py_MAX(maxchar, item_maxchar);
if (i != 0) if (i != 0)
sz += seplen; sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) { if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
...@@ -9747,7 +9742,7 @@ pad(PyObject *self, ...@@ -9747,7 +9742,7 @@ pad(PyObject *self,
return NULL; return NULL;
} }
maxchar = PyUnicode_MAX_CHAR_VALUE(self); maxchar = PyUnicode_MAX_CHAR_VALUE(self);
maxchar = MAX_MAXCHAR(maxchar, fill); maxchar = Py_MAX(maxchar, fill);
u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar); u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
if (!u) if (!u)
return NULL; return NULL;
...@@ -10061,7 +10056,7 @@ replace(PyObject *self, PyObject *str1, ...@@ -10061,7 +10056,7 @@ replace(PyObject *self, PyObject *str1,
/* Replacing str1 with str2 may cause a maxchar reduction in the /* Replacing str1 with str2 may cause a maxchar reduction in the
result string. */ result string. */
mayshrink = (maxchar_str2 < maxchar); mayshrink = (maxchar_str2 < maxchar);
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); maxchar = Py_MAX(maxchar, maxchar_str2);
if (len1 == len2) { if (len1 == len2) {
/* same length */ /* same length */
...@@ -10647,7 +10642,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right) ...@@ -10647,7 +10642,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
maxchar = PyUnicode_MAX_CHAR_VALUE(u); maxchar = PyUnicode_MAX_CHAR_VALUE(u);
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v); maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
maxchar = MAX_MAXCHAR(maxchar, maxchar2); maxchar = Py_MAX(maxchar, maxchar2);
/* Concat the two Unicode strings */ /* Concat the two Unicode strings */
w = PyUnicode_New(new_len, maxchar); w = PyUnicode_New(new_len, maxchar);
...@@ -10734,7 +10729,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) ...@@ -10734,7 +10729,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
else { else {
maxchar = PyUnicode_MAX_CHAR_VALUE(left); maxchar = PyUnicode_MAX_CHAR_VALUE(left);
maxchar2 = PyUnicode_MAX_CHAR_VALUE(right); maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
maxchar = MAX_MAXCHAR(maxchar, maxchar2); maxchar = Py_MAX(maxchar, maxchar2);
/* Concat the two Unicode strings */ /* Concat the two Unicode strings */
res = PyUnicode_New(new_len, maxchar); res = PyUnicode_New(new_len, maxchar);
...@@ -13846,15 +13841,15 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13846,15 +13841,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (!(flags & F_LJUST)) { if (!(flags & F_LJUST)) {
if (sign) { if (sign) {
if ((width-1) > len) if ((width-1) > len)
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); bufmaxchar = Py_MAX(bufmaxchar, fill);
} }
else { else {
if (width > len) if (width > len)
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); bufmaxchar = Py_MAX(bufmaxchar, fill);
} }
} }
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); bufmaxchar = Py_MAX(bufmaxchar, maxchar);
buflen = width; buflen = width;
if (sign && len == width) if (sign && len == width)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment