Move in-place Unicode append to its own subfunction

b092365c · Victor Stinner · a5f91635 · b092365c
Commit b092365c authored Oct 04, 2011 by Victor Stinner
Show whitespace changes
Inline Side-by-side

Showing with 54 additions and 38 deletions

Objects/unicodeobject.c Objects/unicodeobject.c +54 -38

No files found.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9967,43 +9967,18 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
    return NULL;
 }

-void
-PyUnicode_Append(PyObject **p_left, PyObject *right)
+static void
+unicode_append_inplace(PyObject **p_left, PyObject *right)
 {
-    PyObject *left, *res;
-
-    if (p_left == NULL) {
-        if (!PyErr_Occurred())
-            PyErr_BadInternalCall();
-        return;
-    }
-    left = *p_left;
-    if (right == NULL || !PyUnicode_Check(left)) {
-        if (!PyErr_Occurred())
-            PyErr_BadInternalCall();
-        goto error;
-    }
-
-    if (PyUnicode_CheckExact(left) && left != unicode_empty
-        && PyUnicode_CheckExact(right) && right != unicode_empty
-        && unicode_resizable(left)
-        && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left)
-            || _PyUnicode_WSTR(left) != NULL))
-    {
    Py_ssize_t left_len, right_len, new_len;
 #ifdef Py_DEBUG
    Py_ssize_t copied;
 #endif

-        if (PyUnicode_READY(left))
-            goto error;
-        if (PyUnicode_READY(right))
-            goto error;
+    assert(PyUnicode_IS_READY(*p_left));
+    assert(PyUnicode_IS_READY(right));

-        /* FIXME: support ascii+latin1, PyASCIIObject => PyCompactUnicodeObject */
-        if (PyUnicode_MAX_CHAR_VALUE(right) <= PyUnicode_MAX_CHAR_VALUE(left))
-        {
-            left_len = PyUnicode_GET_LENGTH(left);
+    left_len = PyUnicode_GET_LENGTH(*p_left);
    right_len = PyUnicode_GET_LENGTH(right);
    if (left_len > PY_SSIZE_T_MAX - right_len) {
        PyErr_SetString(PyExc_OverflowError,
@@ -10015,7 +9990,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
    /* Now we own the last reference to 'left', so we can resize it
     * in-place.
     */
-            if (unicode_resize(&left, new_len) != 0) {
+    if (unicode_resize(p_left, new_len) != 0) {
        /* XXX if _PyUnicode_Resize() fails, 'left' has been
         * deallocated so it cannot be put back into
         * 'variable'.  The MemoryError is raised when there
@@ -10026,14 +10001,55 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
    }
    /* copy 'right' into the newly allocated area of 'left' */
 #ifdef Py_DEBUG
-            copied = PyUnicode_CopyCharacters(left, left_len,
+    copied = PyUnicode_CopyCharacters(*p_left, left_len,
                                      right, 0,
                                      right_len);
    assert(0 <= copied);
 #else
-            PyUnicode_CopyCharacters(left, left_len, right, 0, right_len);
+    PyUnicode_CopyCharacters(*p_left, left_len, right, 0, right_len);
 #endif
-            *p_left = left;
+    return;
+
+error:
+    Py_DECREF(*p_left);
+    *p_left = NULL;
+}
+
+void
+PyUnicode_Append(PyObject **p_left, PyObject *right)
+{
+    PyObject *left, *res;
+
+    if (p_left == NULL) {
+        if (!PyErr_Occurred())
+            PyErr_BadInternalCall();
+        return;
+    }
+    left = *p_left;
+    if (right == NULL || !PyUnicode_Check(left)) {
+        if (!PyErr_Occurred())
+            PyErr_BadInternalCall();
+        goto error;
+    }
+
+    if (PyUnicode_CheckExact(left) && left != unicode_empty
+        && PyUnicode_CheckExact(right) && right != unicode_empty
+        && unicode_resizable(left)
+        && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left)
+            || _PyUnicode_WSTR(left) != NULL))
+    {
+        if (PyUnicode_READY(left))
+            goto error;
+        if (PyUnicode_READY(right))
+            goto error;
+
+        /* Don't resize for ascii += latin1. Convert ascii to latin1 requires
+           to change the structure size, but characters are stored just after
+           the structure, and so it requires to move all charactres which is
+           not so different than duplicating the string. */
+        if (!(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
+        {
+            unicode_append_inplace(p_left, right);
            return;
        }
    }