Issue #10156: In the interpreter's initialization phase, unicode globals

are now initialized dynamically as needed.

Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
05997253 · Serhiy Storchaka · 5bb893c4 · 05997253 · 05997253
Commit 05997253 authored Jan 26, 2013 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 52 deletions

Misc/NEWS Misc/NEWS +3 -0

Objects/unicodeobject.c Objects/unicodeobject.c +45 -52

No files found.
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------

+- Issue #10156: In the interpreter's initialization phase, unicode globals
+  are now initialized dynamically as needed.
+
 - Issue #16975: Fix error handling bug in the escape-decode bytes decoder.

 - Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -80,8 +80,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

 /* --- Globals ------------------------------------------------------------

-   The globals are initialized by the _PyUnicode_Init() API and should
-   not be used before calling that API.
+NOTE: In the interpreter's initialization phase, some globals are currently
+      initialized dynamically as needed. In the process Unicode objects may
+      be created before the Unicode type is ready.

 */

@@ -98,18 +99,30 @@ extern "C" {
   Another way to look at this is that to say that the actual reference
   count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-static PyObject *interned;
+static PyObject *interned = NULL;

 /* Free list for Unicode objects */
-static PyUnicodeObject *free_list;
-static int numfree;
+static PyUnicodeObject *free_list = NULL;
+static int numfree = 0;

 /* The empty Unicode object is shared to improve performance. */
-static PyUnicodeObject *unicode_empty;
+static PyUnicodeObject *unicode_empty = NULL;
+
+#define _Py_RETURN_UNICODE_EMPTY()                      \
+    do {                                                \
+        if (unicode_empty != NULL)                      \
+            Py_INCREF(unicode_empty);                   \
+        else {                                          \
+            unicode_empty = _PyUnicode_New(0);          \
+            if (unicode_empty != NULL)                  \
+                Py_INCREF(unicode_empty);               \
+        }                                               \
+        return (PyObject *)unicode_empty;               \
+    } while (0)

 /* Single character Unicode strings in the Latin-1 range are being
   shared as well. */
-static PyUnicodeObject *unicode_latin1[256];
+static PyUnicodeObject *unicode_latin1[256] = {NULL};

 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -214,7 +227,7 @@ PyUnicode_GetMax(void)

 #define BLOOM_MASK unsigned long

-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;

 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -479,10 +492,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
    if (u != NULL) {

        /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();

        /* Single character Unicode objects in the Latin-1 range are
           shared when using this constructor */
@@ -528,10 +539,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
    if (u != NULL) {

        /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();

        /* Single characters are shared when using this constructor.
           Restrict to ASCII, since the input must be UTF-8. */
@@ -1393,15 +1402,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,

    /* Decoding bytes objects is the most common case and should be fast */
    if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) {
-            Py_INCREF(unicode_empty);
-            v = (PyObject *) unicode_empty;
-        }
-        else {
-            v = PyUnicode_Decode(
-                    PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
-                    encoding, errors);
-        }
+        if (PyBytes_GET_SIZE(obj) == 0)
+            _Py_RETURN_UNICODE_EMPTY();
+        v = PyUnicode_Decode(
+                PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+                encoding, errors);
        return v;
    }

@@ -1421,12 +1426,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
    }

    if (buffer.len == 0) {
-        Py_INCREF(unicode_empty);
-        v = (PyObject *) unicode_empty;
+        PyBuffer_Release(&buffer);
+        _Py_RETURN_UNICODE_EMPTY();
    }
-    else
-        v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);

+    v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
    PyBuffer_Release(&buffer);
    return v;
 }
@@ -8323,10 +8327,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
    Py_ssize_t nchars;
    size_t nbytes;

-    if (len < 1) {
-        Py_INCREF(unicode_empty);
-        return (PyObject *)unicode_empty;
-    }
+    if (len < 1)
+        _Py_RETURN_UNICODE_EMPTY();

    if (len == 1 && PyUnicode_CheckExact(str)) {
        /* no repeat, return original string */
@@ -10056,8 +10058,6 @@ PyTypeObject PyUnicode_Type = {

 void _PyUnicode_Init(void)
 {
-    int i;
-
    /* XXX - move this array to unicodectype.c ? */
    Py_UNICODE linebreak[] = {
        0x000A, /* LINE FEED */
@@ -10071,14 +10071,12 @@ void _PyUnicode_Init(void)
    };

    /* Init the implementation */
-    free_list = NULL;
-    numfree = 0;
-    unicode_empty = _PyUnicode_New(0);
-    if (!unicode_empty)
-        return;
+    if (!unicode_empty) {
+        unicode_empty = _PyUnicode_New(0);
+        if (!unicode_empty)
+            return;
+    }

-    for (i = 0; i < 256; i++)
-        unicode_latin1[i] = NULL;
    if (PyType_Ready(&PyUnicode_Type) < 0)
        Py_FatalError("Can't initialize 'unicode'");

@@ -10123,15 +10121,11 @@ _PyUnicode_Fini(void)
 {
    int i;

-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
+    Py_CLEAR(unicode_empty);
+
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);

-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
    (void)PyUnicode_ClearFreeList();
 }

@@ -10250,8 +10244,7 @@ void _Py_ReleaseInternedUnicodeStrings(void)
            "mortal/immortal\n", mortal_size, immortal_size);
    Py_DECREF(keys);
    PyDict_Clear(interned);
-    Py_DECREF(interned);
-    interned = NULL;
+    Py_CLEAR(interned);
 }