Issue #19424: Optimize PyUnicode_CompareWithASCIIString()

Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes.

Issue #19424: Optimize PyUnicode_CompareWithASCIIString()
Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes.
602f7cf0 · Victor Stinner · ab457a21 · 602f7cf0
Commit 602f7cf0 authored Oct 29, 2013 by Victor Stinner
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 13 deletions

Objects/unicodeobject.c Objects/unicodeobject.c +30 -13

No files found.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10573,14 +10573,30 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 {
    Py_ssize_t i;
    int kind;
-    void *data;
    Py_UCS4 chr;
    assert(_PyUnicode_CHECK(uni));
    if (PyUnicode_READY(uni) == -1)
        return -1;
    kind = PyUnicode_KIND(uni);
-    data = PyUnicode_DATA(uni);
+    if (kind == PyUnicode_1BYTE_KIND) {
+        char *data = PyUnicode_1BYTE_DATA(uni);
+        Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni);
+        size_t len, len2 = strlen(str);
+        int cmp;
+        len = Py_MIN(len1, len2);
+        cmp = memcmp(data, str, len);
+        if (cmp != 0)
+            return cmp;
+        if (len1 > len2)
+            return 1; /* uni is longer */
+        if (len2 > len1)
+            return -1; /* str is longer */
+        return 0;
+    }
+    else {
+        void *data = PyUnicode_DATA(uni);
        /* Compare Unicode string and source character set string */
        for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
            if (chr != str[i])
@@ -10592,6 +10608,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
        if (str[i])
            return -1; /* str is longer */
        return 0;
+    }
 }