Commit 47a00f3d authored by Benjamin Peterson's avatar Benjamin Peterson

support encoding error handlers that return bytes (closes #16585)

parent aff47239
...@@ -45,6 +45,10 @@ class Test_MultibyteCodec(unittest.TestCase): ...@@ -45,6 +45,10 @@ class Test_MultibyteCodec(unittest.TestCase):
self.assertRaises(IndexError, dec, self.assertRaises(IndexError, dec,
b'apple\x92ham\x93spam', 'test.cjktest') b'apple\x92ham\x93spam', 'test.cjktest')
def test_errorhandler_returns_bytes(self):
enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape')
self.assertEqual(enc, b'\x819\xa79\x80')
def test_codingspec(self): def test_codingspec(self):
try: try:
for enc in ALL_CJKENCODINGS: for enc in ALL_CJKENCODINGS:
......
...@@ -98,6 +98,9 @@ Core and Builtins ...@@ -98,6 +98,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16585: Make CJK encoders support error handlers that return bytes per
PEP 383.
- Issue #10182: The re module doesn't truncate indices to 32 bits anymore. - Issue #10182: The re module doesn't truncate indices to 32 bits anymore.
Patch by Serhiy Storchaka. Patch by Serhiy Storchaka.
......
...@@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec, ...@@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
goto errorexit; goto errorexit;
if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
!PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"encoding error handler must return " "encoding error handler must return "
...@@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec, ...@@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
goto errorexit; goto errorexit;
} }
{ if (PyUnicode_Check(tobj)) {
const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
retstr = multibytecodec_encode(codec, state, &uraw, retstr = multibytecodec_encode(codec, state, &uraw,
...@@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec, ...@@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
if (retstr == NULL) if (retstr == NULL)
goto errorexit; goto errorexit;
} }
else {
Py_INCREF(tobj);
retstr = tobj;
}
assert(PyBytes_Check(retstr)); assert(PyBytes_Check(retstr));
retstrsize = PyBytes_GET_SIZE(retstr); retstrsize = PyBytes_GET_SIZE(retstr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment