Commit 88d8fb6a authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #13916: Disallowed the surrogatepass error handler for non UTF-*

encodings.
parent 8e4efbe1
...@@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase): ...@@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase):
('[\u20ac]', 'replace', b'[?]'), ('[\u20ac]', 'replace', b'[?]'),
('[\xff]', 'backslashreplace', b'[\\xff]'), ('[\xff]', 'backslashreplace', b'[\\xff]'),
('[\xff]', 'xmlcharrefreplace', b'[ÿ]'), ('[\xff]', 'xmlcharrefreplace', b'[ÿ]'),
('\udcff', 'strict', None),
('[\udcff]', 'surrogateescape', b'[\xff]'),
('[\udcff]', 'surrogatepass', None),
)) ))
self.check_decode(932, ( self.check_decode(932, (
(b'abc', 'strict', 'abc'), (b'abc', 'strict', 'abc'),
...@@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase): ...@@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase):
(b'[\xff]', 'ignore', '[]'), (b'[\xff]', 'ignore', '[]'),
(b'[\xff]', 'replace', '[\ufffd]'), (b'[\xff]', 'replace', '[\ufffd]'),
(b'[\xff]', 'surrogateescape', '[\udcff]'), (b'[\xff]', 'surrogateescape', '[\udcff]'),
(b'[\xff]', 'surrogatepass', None),
(b'\x81\x00abc', 'strict', None), (b'\x81\x00abc', 'strict', None),
(b'\x81\x00abc', 'ignore', '\x00abc'), (b'\x81\x00abc', 'ignore', '\x00abc'),
(b'\x81\x00abc', 'replace', '\ufffd\x00abc'), (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
...@@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase): ...@@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase):
('abc', 'strict', b'abc'), ('abc', 'strict', b'abc'),
('\xe9\u20ac', 'strict', b'\xe9\x80'), ('\xe9\u20ac', 'strict', b'\xe9\x80'),
('\xff', 'strict', b'\xff'), ('\xff', 'strict', b'\xff'),
# test error handlers
('\u0141', 'strict', None), ('\u0141', 'strict', None),
('\u0141', 'ignore', b''), ('\u0141', 'ignore', b''),
('\u0141', 'replace', b'L'), ('\u0141', 'replace', b'L'),
('\udc98', 'surrogateescape', b'\x98'),
('\udc98', 'surrogatepass', None),
)) ))
self.check_decode(1252, ( self.check_decode(1252, (
(b'abc', 'strict', 'abc'), (b'abc', 'strict', 'abc'),
(b'\xe9\x80', 'strict', '\xe9\u20ac'), (b'\xe9\x80', 'strict', '\xe9\u20ac'),
(b'\xff', 'strict', '\xff'), (b'\xff', 'strict', '\xff'),
# invalid bytes
(b'[\x98]', 'strict', None),
(b'[\x98]', 'ignore', '[]'),
(b'[\x98]', 'replace', '[\ufffd]'),
(b'[\x98]', 'surrogateescape', '[\udc98]'),
(b'[\x98]', 'surrogatepass', None),
)) ))
def test_cp_utf7(self): def test_cp_utf7(self):
......
...@@ -84,6 +84,9 @@ Core and Builtins ...@@ -84,6 +84,9 @@ Core and Builtins
Library Library
------- -------
- Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
encodings.
- Issue #20998: Fixed re.fullmatch() of repeated single character pattern - Issue #20998: Fixed re.fullmatch() of repeated single character pattern
with ignore case. Original patch by Matthew Barnett. with ignore case. Original patch by Matthew Barnett.
......
...@@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) ...@@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
} }
} }
#define ENC_UNKNOWN -1
#define ENC_UTF8 0 #define ENC_UTF8 0
#define ENC_UTF16BE 1 #define ENC_UTF16BE 1
#define ENC_UTF16LE 2 #define ENC_UTF16LE 2
...@@ -916,7 +917,11 @@ get_standard_encoding(const char *encoding, int *bytelength) ...@@ -916,7 +917,11 @@ get_standard_encoding(const char *encoding, int *bytelength)
encoding += 3; encoding += 3;
if (*encoding == '-' || *encoding == '_' ) if (*encoding == '-' || *encoding == '_' )
encoding++; encoding++;
if (encoding[0] == '1' && encoding[1] == '6') { if (encoding[0] == '8' && encoding[1] == '\0') {
*bytelength = 3;
return ENC_UTF8;
}
else if (encoding[0] == '1' && encoding[1] == '6') {
encoding += 2; encoding += 2;
*bytelength = 2; *bytelength = 2;
if (*encoding == '\0') { if (*encoding == '\0') {
...@@ -955,9 +960,7 @@ get_standard_encoding(const char *encoding, int *bytelength) ...@@ -955,9 +960,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
} }
} }
} }
/* utf-8 */ return ENC_UNKNOWN;
*bytelength = 3;
return ENC_UTF8;
} }
/* This handler is declared static until someone demonstrates /* This handler is declared static until someone demonstrates
...@@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc) ...@@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
} }
code = get_standard_encoding(encoding, &bytelength); code = get_standard_encoding(encoding, &bytelength);
Py_DECREF(encode); Py_DECREF(encode);
if (code == ENC_UNKNOWN) {
/* Not supported, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Py_DECREF(object);
return NULL;
}
res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
if (!res) { if (!res) {
...@@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc) ...@@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
} }
code = get_standard_encoding(encoding, &bytelength); code = get_standard_encoding(encoding, &bytelength);
Py_DECREF(encode); Py_DECREF(encode);
if (code == ENC_UNKNOWN) {
/* Not supported, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Py_DECREF(object);
return NULL;
}
/* Try decoding a single surrogate character. If /* Try decoding a single surrogate character. If
there are more, let the codec call us again. */ there are more, let the codec call us again. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment