Merge heads

4a219a99 · Serhiy Storchaka · a04d6b89 · 645e59d1 · 4a219a99 · 4a219a99
Commit 4a219a99 authored Jan 04, 2014 by Serhiy Storchaka
Showing with 62 additions and 133 deletions

Doc/whatsnew/3.4.rst Doc/whatsnew/3.4.rst +1 -3

Misc/NEWS Misc/NEWS +0 -2

Objects/stringlib/codecs.h Objects/stringlib/codecs.h +0 -87

Objects/unicodeobject.c Objects/unicodeobject.c +61 -41

No files found.
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -1213,9 +1213,7 @@ Other Improvements
 Significant Optimizations
 =========================

-* The UTF-32 decoder is now 3x to 4x faster.  The UTF-32 encoder is now 1.6x
-  to 3.5x faster.  (Contributed by Serhiy Storchaka in :issue:`14625` and
-  :issue:`15027`.)
+* The UTF-32 decoder is now 3x to 4x faster.

 * The cost of hash collisions for sets is now reduced.  Each hash table
  probe now checks a series of consecutive, adjacent key/hash pairs before

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,8 +10,6 @@ Release date: 2014-01-05
 Core and Builtins
 -----------------

- Issue #15027: Rewrite the UTF-32 encoder.  It is now 1.6x to 3.5x faster.
-
 - Issue #17432: Drop UCS2 from names of Unicode functions in python3.def.

 - Issue #19526: Exclude all new API from the stable ABI. Exceptions can be

--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -718,93 +718,6 @@ STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
    return len - (end - in + 1);
 #endif
 }
-
-#if STRINGLIB_SIZEOF_CHAR == 1
-# define SWAB4(CH, tmp)  ((CH) << 24) /* high bytes are zero */
-#elif STRINGLIB_SIZEOF_CHAR == 2
-# define SWAB4(CH, tmp)  (tmp = (CH), \
-            ((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))
-            /* high bytes are zero */
-#else
-# define SWAB4(CH, tmp)  (tmp = (CH), \
-            tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
-            ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
-#endif
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
-                        Py_ssize_t len,
-                        PY_UINT32_T **outptr,
-                        int native_ordering)
-{
-    PY_UINT32_T *out = *outptr;
-    const STRINGLIB_CHAR *end = in + len;
-    if (native_ordering) {
-        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
-        while (in < unrolled_end) {
-#if STRINGLIB_SIZEOF_CHAR > 1
-            /* check if any character is a surrogate character */
-            if (((in[0] ^ 0xd800) &
-                 (in[1] ^ 0xd800) &
-                 (in[2] ^ 0xd800) &
-                 (in[3] ^ 0xd800) & 0xf800) == 0)
-                break;
-#endif
-            out[0] = in[0];
-            out[1] = in[1];
-            out[2] = in[2];
-            out[3] = in[3];
-            in += 4; out += 4;
-        }
-        while (in < end) {
-            Py_UCS4 ch;
-            ch = *in++;
-#if STRINGLIB_SIZEOF_CHAR > 1
-            if (Py_UNICODE_IS_SURROGATE(ch)) {
-                /* reject surrogate characters (U+DC800-U+DFFF) */
-                goto fail;
-            }
-#endif
-            *out++ = ch;
-        }
-    } else {
-        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
-        while (in < unrolled_end) {
-#if STRINGLIB_SIZEOF_CHAR > 1
-            Py_UCS4 ch1, ch2, ch3, ch4;
-            /* check if any character is a surrogate character */
-            if (((in[0] ^ 0xd800) &
-                 (in[1] ^ 0xd800) &
-                 (in[2] ^ 0xd800) &
-                 (in[3] ^ 0xd800) & 0xf800) == 0)
-                break;
-#endif
-            out[0] = SWAB4(in[0], ch1);
-            out[1] = SWAB4(in[1], ch2);
-            out[2] = SWAB4(in[2], ch3);
-            out[3] = SWAB4(in[3], ch4);
-            in += 4; out += 4;
-        }
-        while (in < end) {
-            Py_UCS4 ch = *in++;
-#if STRINGLIB_SIZEOF_CHAR > 1
-            if (Py_UNICODE_IS_SURROGATE(ch)) {
-                /* reject surrogate characters (U+DC800-U+DFFF) */
-                goto fail;
-            }
-#endif
-            *out++ = SWAB4(ch, ch);
-        }
-    }
-    *outptr = out;
-    return len;
-#if STRINGLIB_SIZEOF_CHAR > 1
-  fail:
-    *outptr = out;
-    return len - (end - in + 1);
-#endif
-}
-#undef SWAB4
-
 #endif

 #endif /* STRINGLIB_IS_UNICODE */
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5085,22 +5085,32 @@ _PyUnicode_EncodeUTF32(PyObject *str,
                       const char *errors,
                       int byteorder)
 {
-    enum PyUnicode_Kind kind;
-    const void *data;
+    int kind;
+    void *data;
    Py_ssize_t len;
    PyObject *v;
-    PY_UINT32_T *out;
+    unsigned char *p;
+    Py_ssize_t nsize, i;
+    /* Offsets from p for storing byte pairs in the right order. */
 #if PY_LITTLE_ENDIAN
-    int native_ordering = byteorder <= 0;
+    int iorder[] = {0, 1, 2, 3};
 #else
-    int native_ordering = byteorder >= 0;
+    int iorder[] = {3, 2, 1, 0};
 #endif
    const char *encoding;
-    Py_ssize_t nsize, pos;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    PyObject *rep = NULL;

+#define STORECHAR(CH)                           \
+    do {                                        \
+        p[iorder[3]] = ((CH) >> 24) & 0xff;     \
+        p[iorder[2]] = ((CH) >> 16) & 0xff;     \
+        p[iorder[1]] = ((CH) >> 8) & 0xff;      \
+        p[iorder[0]] = (CH) & 0xff;             \
+        p += 4;                                 \
+    } while(0)
+
    if (!PyUnicode_Check(str)) {
        PyErr_BadArgument();
        return NULL;
@@ -5111,53 +5121,59 @@ _PyUnicode_EncodeUTF32(PyObject *str,
    data = PyUnicode_DATA(str);
    len = PyUnicode_GET_LENGTH(str);

-    if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0))
-        return PyErr_NoMemory();
    nsize = len + (byteorder == 0);
+    if (nsize > PY_SSIZE_T_MAX / 4)
+        return PyErr_NoMemory();
    v = PyBytes_FromStringAndSize(NULL, nsize * 4);
    if (v == NULL)
        return NULL;

-    /* output buffer is 4-bytes aligned */
-    assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 4));
-    out = (PY_UINT32_T *)PyBytes_AS_STRING(v);
+    p = (unsigned char *)PyBytes_AS_STRING(v);
    if (byteorder == 0)
-        *out++ = 0xFEFF;
+        STORECHAR(0xFEFF);
    if (len == 0)
-        goto done;
+        return v;

-    if (byteorder == -1)
+    if (byteorder == -1) {
+        /* force LE */
+        iorder[0] = 0;
+        iorder[1] = 1;
+        iorder[2] = 2;
+        iorder[3] = 3;
        encoding = "utf-32-le";
-    else if (byteorder == 1)
+    }
+    else if (byteorder == 1) {
+        /* force BE */
+        iorder[0] = 3;
+        iorder[1] = 2;
+        iorder[2] = 1;
+        iorder[3] = 0;
        encoding = "utf-32-be";
+    }
    else
        encoding = "utf-32";

    if (kind == PyUnicode_1BYTE_KIND) {
-        ucs1lib_utf32_encode((const Py_UCS1 *)data, len, &out, native_ordering);
-        goto done;
+        for (i = 0; i < len; i++)
+            STORECHAR(PyUnicode_READ(kind, data, i));
+        return v;
    }

-    pos = 0;
-    while (pos < len) {
+    for (i = 0; i < len;) {
        Py_ssize_t repsize, moreunits;
-
-        if (kind == PyUnicode_2BYTE_KIND) {
-            pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos,
-                                        &out, native_ordering);
-        }
-        else {
-            assert(kind == PyUnicode_4BYTE_KIND);
-            pos += ucs4lib_utf32_encode((const Py_UCS4 *)data + pos, len - pos,
-                                        &out, native_ordering);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        i++;
+        assert(ch <= MAX_UNICODE);
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            STORECHAR(ch);
+            continue;
        }
-        if (pos == len)
-            break;

        rep = unicode_encode_call_errorhandler(
                errors, &errorHandler,
                encoding, "surrogates not allowed",
-                str, &exc, pos, pos + 1, &pos);
+                str, &exc, i-1, i, &i);
+
        if (!rep)
            goto error;

@@ -5165,7 +5181,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
            repsize = PyBytes_GET_SIZE(rep);
            if (repsize & 3) {
                raise_encode_exception(&exc, encoding,
-                                       str, pos - 1, pos,
+                                       str, i - 1, i,
                                       "surrogates not allowed");
                goto error;
            }
@@ -5178,7 +5194,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
            if (!PyUnicode_IS_ASCII(rep)) {
                raise_encode_exception(&exc, encoding,
-                                       str, pos - 1, pos,
+                                       str, i - 1, i,
                                       "surrogates not allowed");
                goto error;
            }
@@ -5186,7 +5202,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,

        /* four bytes are reserved for each surrogate */
        if (moreunits > 1) {
-            Py_ssize_t outpos = out - (PY_UINT32_T*) PyBytes_AS_STRING(v);
+            Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v);
            Py_ssize_t morebytes = 4 * (moreunits - 1);
            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {
                /* integer overflow */
@@ -5195,16 +5211,20 @@ _PyUnicode_EncodeUTF32(PyObject *str,
            }
            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0)
                goto error;
-            out = (PY_UINT32_T*) PyBytes_AS_STRING(v) + outpos;
+            p = (unsigned char*) PyBytes_AS_STRING(v) + outpos;
        }

        if (PyBytes_Check(rep)) {
-            Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize);
-            out += moreunits;
+            Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize);
+            p += repsize;
        } else /* rep is unicode */ {
+            const Py_UCS1 *repdata;
            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
-            ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
-                                 &out, native_ordering);
+            repdata = PyUnicode_1BYTE_DATA(rep);
+            while (repsize--) {
+                Py_UCS4 ch = *repdata++;
+                STORECHAR(ch);
+            }
        }

        Py_CLEAR(rep);
@@ -5213,12 +5233,11 @@ _PyUnicode_EncodeUTF32(PyObject *str,
    /* Cut back to size actually needed. This is necessary for, for example,
       encoding of a string containing isolated surrogates and the 'ignore'
       handler is used. */
-    nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
+    nsize = p - (unsigned char*) PyBytes_AS_STRING(v);
    if (nsize != PyBytes_GET_SIZE(v))
      _PyBytes_Resize(&v, nsize);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
-  done:
    return v;
  error:
    Py_XDECREF(rep);
@@ -5226,6 +5245,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
    Py_XDECREF(exc);
    Py_XDECREF(v);
    return NULL;
+#undef STORECHAR
 }

 PyObject *