Commit 05997253 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #10156: In the interpreter's initialization phase, unicode globals

are now initialized dynamically as needed.
parent 5bb893c4
...@@ -10,6 +10,9 @@ What's New in Python 3.2.4 ...@@ -10,6 +10,9 @@ What's New in Python 3.2.4
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
- Issue #16975: Fix error handling bug in the escape-decode bytes decoder. - Issue #16975: Fix error handling bug in the escape-decode bytes decoder.
- Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping" - Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"
......
...@@ -80,8 +80,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ...@@ -80,8 +80,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* --- Globals ------------------------------------------------------------ /* --- Globals ------------------------------------------------------------
The globals are initialized by the _PyUnicode_Init() API and should NOTE: In the interpreter's initialization phase, some globals are currently
not be used before calling that API. initialized dynamically as needed. In the process Unicode objects may
be created before the Unicode type is ready.
*/ */
...@@ -98,18 +99,30 @@ extern "C" { ...@@ -98,18 +99,30 @@ extern "C" {
Another way to look at this is that to say that the actual reference Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0) count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/ */
static PyObject *interned; static PyObject *interned = NULL;
/* Free list for Unicode objects */ /* Free list for Unicode objects */
static PyUnicodeObject *free_list; static PyUnicodeObject *free_list = NULL;
static int numfree; static int numfree = 0;
/* The empty Unicode object is shared to improve performance. */ /* The empty Unicode object is shared to improve performance. */
static PyUnicodeObject *unicode_empty; static PyUnicodeObject *unicode_empty = NULL;
#define _Py_RETURN_UNICODE_EMPTY() \
do { \
if (unicode_empty != NULL) \
Py_INCREF(unicode_empty); \
else { \
unicode_empty = _PyUnicode_New(0); \
if (unicode_empty != NULL) \
Py_INCREF(unicode_empty); \
} \
return (PyObject *)unicode_empty; \
} while (0)
/* Single character Unicode strings in the Latin-1 range are being /* Single character Unicode strings in the Latin-1 range are being
shared as well. */ shared as well. */
static PyUnicodeObject *unicode_latin1[256]; static PyUnicodeObject *unicode_latin1[256] = {NULL};
/* Fast detection of the most frequent whitespace characters */ /* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = { const unsigned char _Py_ascii_whitespace[] = {
...@@ -214,7 +227,7 @@ PyUnicode_GetMax(void) ...@@ -214,7 +227,7 @@ PyUnicode_GetMax(void)
#define BLOOM_MASK unsigned long #define BLOOM_MASK unsigned long
static BLOOM_MASK bloom_linebreak; static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
...@@ -479,10 +492,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, ...@@ -479,10 +492,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
if (u != NULL) { if (u != NULL) {
/* Optimization for empty strings */ /* Optimization for empty strings */
if (size == 0 && unicode_empty != NULL) { if (size == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
return (PyObject *)unicode_empty;
}
/* Single character Unicode objects in the Latin-1 range are /* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */ shared when using this constructor */
...@@ -528,10 +539,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) ...@@ -528,10 +539,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
if (u != NULL) { if (u != NULL) {
/* Optimization for empty strings */ /* Optimization for empty strings */
if (size == 0 && unicode_empty != NULL) { if (size == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
return (PyObject *)unicode_empty;
}
/* Single characters are shared when using this constructor. /* Single characters are shared when using this constructor.
Restrict to ASCII, since the input must be UTF-8. */ Restrict to ASCII, since the input must be UTF-8. */
...@@ -1393,15 +1402,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -1393,15 +1402,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
/* Decoding bytes objects is the most common case and should be fast */ /* Decoding bytes objects is the most common case and should be fast */
if (PyBytes_Check(obj)) { if (PyBytes_Check(obj)) {
if (PyBytes_GET_SIZE(obj) == 0) { if (PyBytes_GET_SIZE(obj) == 0)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
v = (PyObject *) unicode_empty;
}
else {
v = PyUnicode_Decode( v = PyUnicode_Decode(
PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
encoding, errors); encoding, errors);
}
return v; return v;
} }
...@@ -1421,12 +1426,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -1421,12 +1426,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
} }
if (buffer.len == 0) { if (buffer.len == 0) {
Py_INCREF(unicode_empty); PyBuffer_Release(&buffer);
v = (PyObject *) unicode_empty; _Py_RETURN_UNICODE_EMPTY();
} }
else
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
PyBuffer_Release(&buffer); PyBuffer_Release(&buffer);
return v; return v;
} }
...@@ -8323,10 +8327,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len) ...@@ -8323,10 +8327,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
Py_ssize_t nchars; Py_ssize_t nchars;
size_t nbytes; size_t nbytes;
if (len < 1) { if (len < 1)
Py_INCREF(unicode_empty); _Py_RETURN_UNICODE_EMPTY();
return (PyObject *)unicode_empty;
}
if (len == 1 && PyUnicode_CheckExact(str)) { if (len == 1 && PyUnicode_CheckExact(str)) {
/* no repeat, return original string */ /* no repeat, return original string */
...@@ -10056,8 +10058,6 @@ PyTypeObject PyUnicode_Type = { ...@@ -10056,8 +10058,6 @@ PyTypeObject PyUnicode_Type = {
void _PyUnicode_Init(void) void _PyUnicode_Init(void)
{ {
int i;
/* XXX - move this array to unicodectype.c ? */ /* XXX - move this array to unicodectype.c ? */
Py_UNICODE linebreak[] = { Py_UNICODE linebreak[] = {
0x000A, /* LINE FEED */ 0x000A, /* LINE FEED */
...@@ -10071,14 +10071,12 @@ void _PyUnicode_Init(void) ...@@ -10071,14 +10071,12 @@ void _PyUnicode_Init(void)
}; };
/* Init the implementation */ /* Init the implementation */
free_list = NULL; if (!unicode_empty) {
numfree = 0;
unicode_empty = _PyUnicode_New(0); unicode_empty = _PyUnicode_New(0);
if (!unicode_empty) if (!unicode_empty)
return; return;
}
for (i = 0; i < 256; i++)
unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0) if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'"); Py_FatalError("Can't initialize 'unicode'");
...@@ -10123,15 +10121,11 @@ _PyUnicode_Fini(void) ...@@ -10123,15 +10121,11 @@ _PyUnicode_Fini(void)
{ {
int i; int i;
Py_XDECREF(unicode_empty); Py_CLEAR(unicode_empty);
unicode_empty = NULL;
for (i = 0; i < 256; i++)
Py_CLEAR(unicode_latin1[i]);
for (i = 0; i < 256; i++) {
if (unicode_latin1[i]) {
Py_DECREF(unicode_latin1[i]);
unicode_latin1[i] = NULL;
}
}
(void)PyUnicode_ClearFreeList(); (void)PyUnicode_ClearFreeList();
} }
...@@ -10250,8 +10244,7 @@ void _Py_ReleaseInternedUnicodeStrings(void) ...@@ -10250,8 +10244,7 @@ void _Py_ReleaseInternedUnicodeStrings(void)
"mortal/immortal\n", mortal_size, immortal_size); "mortal/immortal\n", mortal_size, immortal_size);
Py_DECREF(keys); Py_DECREF(keys);
PyDict_Clear(interned); PyDict_Clear(interned);
Py_DECREF(interned); Py_CLEAR(interned);
interned = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment