Fix PyUnicode_Substring() for start >= length and start > end

Remove the fast-path for 1-character string: unicode_fromascii() and _PyUnicode_FromUCS*() now have their own fast-path for 1-character strings.

Fix PyUnicode_Substring() for start >= length and start > end
Remove the fast-path for 1-character string: unicode_fromascii() and _PyUnicode_FromUCS*() now have their own fast-path for 1-character strings.
684d5fd4 · Victor Stinner · b6cd014d · 684d5fd4
Commit 684d5fd4 authored May 03, 2012 by Victor Stinner
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

Objects/unicodeobject.c Objects/unicodeobject.c +8 -6

No files found.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12067,20 +12067,22 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
    if (PyUnicode_READY(self) == -1)
        return NULL;

-    end = Py_MIN(end, PyUnicode_GET_LENGTH(self));
+    length = PyUnicode_GET_LENGTH(self);
+    end = Py_MIN(end, length);

-    if (start == 0 && end == PyUnicode_GET_LENGTH(self))
+    if (start == 0 && end == length)
        return unicode_result_unchanged(self);

-    length = end - start;
-    if (length == 1)
-        return unicode_getitem(self, start);
-
    if (start < 0 || end < 0) {
        PyErr_SetString(PyExc_IndexError, "string index out of range");
        return NULL;
    }
+    if (start >= length || end < start) {
+        assert(end == length);
+        return PyUnicode_New(0, 0);
+    }

+    length = end - start;
    if (PyUnicode_IS_ASCII(self)) {
        data = PyUnicode_1BYTE_DATA(self);
        return unicode_fromascii(data + start, length);