Commit a1c1be4e authored by Benjamin Peterson's avatar Benjamin Peterson

cleanup overflowing handling in unicode_decode_call_errorhandler and...

cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518)
parent e025b52d
...@@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, ...@@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
at the new input position), so we won't have to check space at the new input position), so we won't have to check space
when there are no errors in the rest of the string) */ when there are no errors in the rest of the string) */
Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
requiredsize = *outpos + replen + insize-newpos; requiredsize = *outpos;
if (requiredsize > PY_SSIZE_T_MAX - replen)
goto overflow;
requiredsize += replen;
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
if (requiredsize > outsize) { if (requiredsize > outsize) {
if (requiredsize<2*outsize) if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize; requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0) if (unicode_resize(output, requiredsize) < 0)
goto onError; goto onError;
...@@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, ...@@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
have+the replacement+the rest of the string (starting have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space at the new input position), so we won't have to check space
when there are no errors in the rest of the string) */ when there are no errors in the rest of the string) */
requiredsize = *outpos + repwlen + insize-newpos; requiredsize = *outpos;
if (requiredsize > PY_SSIZE_T_MAX - repwlen)
goto overflow;
requiredsize += repwlen;
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
if (requiredsize > outsize) { if (requiredsize > outsize) {
if (requiredsize < 2*outsize) if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize; requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0) if (unicode_resize(output, requiredsize) < 0)
goto onError; goto onError;
...@@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, ...@@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
onError: onError:
Py_XDECREF(restuple); Py_XDECREF(restuple);
return res; return res;
overflow:
PyErr_SetString(PyExc_OverflowError,
"decoded result is too long for a Python string");
goto onError;
} }
/* --- UTF-7 Codec -------------------------------------------------------- */ /* --- UTF-7 Codec -------------------------------------------------------- */
...@@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t collstart = pos; Py_ssize_t collstart = pos;
Py_ssize_t collend = pos; Py_ssize_t collend = pos;
/* find all unecodable characters */ /* find all unecodable characters */
while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
++collend; ++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */ /* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) { if (known_errorHandler==-1) {
...@@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode,
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
goto onError; goto onError;
case 2: /* replace */ case 2: /* replace */
while (collstart++<collend) while (collstart++ < collend)
*str++ = '?'; /* fall through */ *str++ = '?'; /* fall through */
case 3: /* ignore */ case 3: /* ignore */
pos = collend; pos = collend;
break; break;
case 4: /* xmlcharrefreplace */ case 4: /* xmlcharrefreplace */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
requiredsize = respos;
/* determine replacement size */ /* determine replacement size */
for (i = collstart, repsize = 0; i < collend; ++i) { for (i = collstart; i < collend; ++i) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i); Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_ssize_t incr;
if (ch < 10) if (ch < 10)
repsize += 2+1+1; incr = 2+1+1;
else if (ch < 100) else if (ch < 100)
repsize += 2+2+1; incr = 2+2+1;
else if (ch < 1000) else if (ch < 1000)
repsize += 2+3+1; incr = 2+3+1;
else if (ch < 10000) else if (ch < 10000)
repsize += 2+4+1; incr = 2+4+1;
else if (ch < 100000) else if (ch < 100000)
repsize += 2+5+1; incr = 2+5+1;
else if (ch < 1000000) else if (ch < 1000000)
repsize += 2+6+1; incr = 2+6+1;
else { else {
assert(ch <= MAX_UNICODE); assert(ch <= MAX_UNICODE);
repsize += 2+7+1; incr = 2+7+1;
} }
if (requiredsize > PY_SSIZE_T_MAX - incr)
goto overflow;
requiredsize += incr;
} }
requiredsize = respos+repsize+(size-collend); if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
goto overflow;
requiredsize += size - collend;
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize)) if (_PyBytes_Resize(&res, requiredsize))
goto onError; goto onError;
...@@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode,
if (repsize > 1) { if (repsize > 1) {
/* Make room for all additional bytes. */ /* Make room for all additional bytes. */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
if (ressize > PY_SSIZE_T_MAX - repsize - 1) {
Py_DECREF(repunicode);
goto overflow;
}
if (_PyBytes_Resize(&res, ressize+repsize-1)) { if (_PyBytes_Resize(&res, ressize+repsize-1)) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
...@@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode,
we won't have to check space for encodable characters) */ we won't have to check space for encodable characters) */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_LENGTH(repunicode); repsize = PyUnicode_GET_LENGTH(repunicode);
requiredsize = respos+repsize+(size-collend); requiredsize = respos;
if (requiredsize > PY_SSIZE_T_MAX - repsize)
goto overflow;
requiredsize += repsize;
if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
goto overflow;
requiredsize += size - collend;
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize)) { if (_PyBytes_Resize(&res, requiredsize)) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
...@@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode,
Py_XDECREF(exc); Py_XDECREF(exc);
return res; return res;
overflow:
PyErr_SetString(PyExc_OverflowError,
"encoded result is too long for a Python string");
onError: onError:
Py_XDECREF(res); Py_XDECREF(res);
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment