Commit d48ba0bd authored by Victor Stinner's avatar Victor Stinner

(Merge 3.1) Issue #12100: Don't reset incremental encoders of CJK codecs at

each call to their encode() method anymore, but continue to call the reset()
method if the final argument is True.
parents e6a06217 6bcbef7d
...@@ -256,6 +256,36 @@ class Test_ISO2022(unittest.TestCase): ...@@ -256,6 +256,36 @@ class Test_ISO2022(unittest.TestCase):
# Any ISO 2022 codec will cause the segfault # Any ISO 2022 codec will cause the segfault
myunichr(x).encode('iso_2022_jp', 'ignore') myunichr(x).encode('iso_2022_jp', 'ignore')
class TestStateful(unittest.TestCase):
text = '\u4E16\u4E16'
encoding = 'iso-2022-jp'
expected = b'\x1b$B@$@$'
expected_reset = b'\x1b$B@$@$\x1b(B'
def test_encode(self):
self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
def test_incrementalencoder(self):
encoder = codecs.getincrementalencoder(self.encoding)()
output = b''.join(
encoder.encode(char)
for char in self.text)
self.assertEqual(output, self.expected)
def test_incrementalencoder_final(self):
encoder = codecs.getincrementalencoder(self.encoding)()
last_index = len(self.text) - 1
output = b''.join(
encoder.encode(char, index == last_index)
for index, char in enumerate(self.text))
self.assertEqual(output, self.expected_reset)
class TestHZStateful(TestStateful):
text = '\u804a\u804a'
encoding = 'hz'
expected = b'~{ADAD'
expected_reset = b'~{ADAD~}'
def test_main(): def test_main():
support.run_unittest(__name__) support.run_unittest(__name__)
......
...@@ -13,6 +13,10 @@ Core and Builtins ...@@ -13,6 +13,10 @@ Core and Builtins
Library Library
------- -------
- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
their encode() method anymore, but continue to call the reset() method if the
final argument is True.
- Issue #5715: In socketserver, close the server socket in the child process. - Issue #5715: In socketserver, close the server socket in the child process.
- Correct lookup of __dir__ on objects. Among other things, this causes errors - Correct lookup of __dir__ on objects. Among other things, this causes errors
......
...@@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec, ...@@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec,
MultibyteEncodeBuffer buf; MultibyteEncodeBuffer buf;
Py_ssize_t finalsize, r = 0; Py_ssize_t finalsize, r = 0;
if (datalen == 0) if (datalen == 0 && !(flags & MBENC_RESET))
return PyBytes_FromStringAndSize(NULL, 0); return PyBytes_FromStringAndSize(NULL, 0);
buf.excobj = NULL; buf.excobj = NULL;
...@@ -514,7 +514,7 @@ multibytecodec_encode(MultibyteCodec *codec, ...@@ -514,7 +514,7 @@ multibytecodec_encode(MultibyteCodec *codec,
break; break;
} }
if (codec->encreset != NULL) if (codec->encreset != NULL && (flags & MBENC_RESET))
for (;;) { for (;;) {
Py_ssize_t outleft; Py_ssize_t outleft;
...@@ -784,8 +784,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, ...@@ -784,8 +784,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
inbuf_end = inbuf + datalen; inbuf_end = inbuf + datalen;
r = multibytecodec_encode(ctx->codec, &ctx->state, r = multibytecodec_encode(ctx->codec, &ctx->state,
(const Py_UNICODE **)&inbuf, (const Py_UNICODE **)&inbuf, datalen,
datalen, ctx->errors, final ? MBENC_FLUSH : 0); ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
if (r == NULL) { if (r == NULL) {
/* recover the original pending buffer */ /* recover the original pending buffer */
if (origpending > 0) if (origpending > 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment