Commit 03ee12ed authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #17043: The unicode-internal decoder no longer read past the end of

input buffer.
parents cf0904ff 3fd4ab35
...@@ -12,6 +12,9 @@ What's New in Python 3.3.1? ...@@ -12,6 +12,9 @@ What's New in Python 3.3.1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
- Issue #17098: All modules now have __loader__ set even if they pre-exist the - Issue #17098: All modules now have __loader__ set even if they pre-exist the
bootstrapping of importlib. bootstrapping of importlib.
......
...@@ -6103,6 +6103,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -6103,6 +6103,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
while (s < end) { while (s < end) {
Py_UNICODE uch; Py_UNICODE uch;
Py_UCS4 ch; Py_UCS4 ch;
if (end - s < Py_UNICODE_SIZE) {
endinpos = end-starts;
reason = "truncated input";
goto error;
}
/* We copy the raw representation one byte at a time because the /* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */ pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0]; ((char *) &uch)[0] = s[0];
...@@ -6112,37 +6117,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -6112,37 +6117,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
((char *) &uch)[3] = s[3]; ((char *) &uch)[3] = s[3];
#endif #endif
ch = uch; ch = uch;
#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for /* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */ some malformed UCS-4 data. */
if ( if (ch > 0x10ffff) {
#ifdef Py_UNICODE_WIDE endinpos = s - starts + Py_UNICODE_SIZE;
ch > 0x10ffff || reason = "illegal code point (> 0x10FFFF)";
#endif goto error;
end-s < Py_UNICODE_SIZE
)
{
startinpos = s - starts;
if (end-s < Py_UNICODE_SIZE) {
endinpos = end-starts;
reason = "truncated input";
}
else {
endinpos = s - starts + Py_UNICODE_SIZE;
reason = "illegal code point (> 0x10FFFF)";
}
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &outpos))
goto onError;
continue;
} }
#endif
s += Py_UNICODE_SIZE; s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE #ifndef Py_UNICODE_WIDE
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{ {
Py_UNICODE uch2; Py_UNICODE uch2;
((char *) &uch2)[0] = s[0]; ((char *) &uch2)[0] = s[0];
...@@ -6157,6 +6143,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -6157,6 +6143,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
if (unicode_putchar(&v, &outpos, ch) < 0) if (unicode_putchar(&v, &outpos, ch) < 0)
goto onError; goto onError;
continue;
error:
startinpos = s - starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &outpos))
goto onError;
} }
if (unicode_resize(&v, outpos) < 0) if (unicode_resize(&v, outpos) < 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment