Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.

7aa69086 · Serhiy Storchaka · f02de493 · 7aa69086 · 7aa69086 · 7aa69086
Commit 7aa69086 authored Dec 03, 2015 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 0 deletions

Lib/test/test_unicode.py Lib/test/test_unicode.py +17 -0

Misc/NEWS Misc/NEWS +2 -0

Objects/unicodeobject.c Objects/unicodeobject.c +5 -0

No files found.
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2672,6 +2672,23 @@ class UnicodeTest(string_tests.CommonTest,
        self.assertTrue(astral >= bmp2)
        self.assertFalse(astral >= astral2)

+    @support.cpython_only
+    def test_pep393_utf8_caching_bug(self):
+        # Issue #25709: Problem with string concatenation and utf-8 cache
+        from _testcapi import getargs_s_hash
+        for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
+            s = ''
+            for i in range(5):
+                # Due to CPython specific optimization the 's' string can be
+                # resized in-place.
+                s += chr(k)
+                # Parsing with the "s#" format code calls indirectly
+                # PyUnicode_AsUTF8AndSize() which creates the UTF-8
+                # encoded string cached in the Unicode object.
+                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+                # Check that the second call returns the same result
+                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+

 class StringModuleTest(unittest.TestCase):
    def test_formatter_parser(self):

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ Release date: tba
 Core and Builtins
 -----------------

+- Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.
+
 - Issue #24097: Fixed crash in object.__reduce__() if slot name is freed inside
  __getattr__.


--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -724,6 +724,11 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
    }
    new_size = (struct_size + (length + 1) * char_size);

+    if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
+        PyObject_DEL(_PyUnicode_UTF8(unicode));
+        _PyUnicode_UTF8(unicode) = NULL;
+        _PyUnicode_UTF8_LENGTH(unicode) = 0;
+    }
    _Py_DEC_REFTOTAL;
    _Py_ForgetReference(unicode);