Commit 6c9aa8f2 authored by Victor Stinner's avatar Victor Stinner

Fix str.translate()

Issue #26464: Fix str.translate() when string is ASCII and first replacements
removes character, but next replacement uses a non-ASCII character or a string
longer than 1 character. Regression introduced in Python 3.5.0.
parent bb0dbd58
...@@ -347,6 +347,10 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -347,6 +347,10 @@ class UnicodeTest(string_tests.CommonTest,
"[a]") "[a]")
self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})), self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})),
"[]") "[]")
self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '123'})),
"x123")
self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '\xe9'})),
"x\xe9")
# invalid Unicode characters # invalid Unicode characters
invalid_char = 0x10ffff+1 invalid_char = 0x10ffff+1
......
...@@ -10,6 +10,10 @@ Release date: tba ...@@ -10,6 +10,10 @@ Release date: tba
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #26464: Fix str.translate() when string is ASCII and first replacements
removes character, but next replacement uses a non-ASCII character or a
string longer than 1 character. Regression introduced in Python 3.5.0.
- Issue #22836: Ensure exception reports from PyErr_Display() and - Issue #22836: Ensure exception reports from PyErr_Display() and
PyErr_WriteUnraisable() are sensible even when formatting them produces PyErr_WriteUnraisable() are sensible even when formatting them produces
secondary errors. This affects the reports produced by secondary errors. This affects the reports produced by
......
...@@ -8574,7 +8574,8 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, ...@@ -8574,7 +8574,8 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
translated into writer, raise an exception and return -1 on error. */ translated into writer, raise an exception and return -1 on error. */
static int static int
unicode_fast_translate(PyObject *input, PyObject *mapping, unicode_fast_translate(PyObject *input, PyObject *mapping,
_PyUnicodeWriter *writer, int ignore) _PyUnicodeWriter *writer, int ignore,
Py_ssize_t *input_pos)
{ {
Py_UCS1 ascii_table[128], ch, ch2; Py_UCS1 ascii_table[128], ch, ch2;
Py_ssize_t len; Py_ssize_t len;
...@@ -8621,6 +8622,7 @@ unicode_fast_translate(PyObject *input, PyObject *mapping, ...@@ -8621,6 +8622,7 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
exit: exit:
writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer); writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
*input_pos = in - PyUnicode_1BYTE_DATA(input);
return res; return res;
} }
...@@ -8666,7 +8668,7 @@ _PyUnicode_TranslateCharmap(PyObject *input, ...@@ -8666,7 +8668,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
ignore = (errors != NULL && strcmp(errors, "ignore") == 0); ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
res = unicode_fast_translate(input, mapping, &writer, ignore); res = unicode_fast_translate(input, mapping, &writer, ignore, &i);
if (res < 0) { if (res < 0) {
_PyUnicodeWriter_Dealloc(&writer); _PyUnicodeWriter_Dealloc(&writer);
return NULL; return NULL;
...@@ -8674,7 +8676,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, ...@@ -8674,7 +8676,6 @@ _PyUnicode_TranslateCharmap(PyObject *input,
if (res == 1) if (res == 1)
return _PyUnicodeWriter_Finish(&writer); return _PyUnicodeWriter_Finish(&writer);
i = writer.pos;
while (i<size) { while (i<size) {
/* try to encode it */ /* try to encode it */
int translate; int translate;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment