Commit 1b4f9cec authored by Victor Stinner's avatar Victor Stinner

Create _PyUnicode_READY_REPLACE() to reuse singleton

Only use _PyUnicode_READY_REPLACE() on just created strings.
parent c379ead9
...@@ -130,6 +130,11 @@ extern "C" { ...@@ -130,6 +130,11 @@ extern "C" {
(PyUnicode_IS_READY(op) ? \ (PyUnicode_IS_READY(op) ? \
0 : _PyUnicode_Ready((PyObject *)(op)))) 0 : _PyUnicode_Ready((PyObject *)(op))))
#define _PyUnicode_READY_REPLACE(p_obj) \
(assert(_PyUnicode_CHECK(*p_obj)), \
(PyUnicode_IS_READY(*p_obj) ? \
0 : _PyUnicode_ReadyReplace((PyObject **)(p_obj))))
#define _PyUnicode_SHARE_UTF8(op) \ #define _PyUnicode_SHARE_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \ (assert(_PyUnicode_CHECK(op)), \
assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
...@@ -212,7 +217,9 @@ const unsigned char _Py_ascii_whitespace[] = { ...@@ -212,7 +217,9 @@ const unsigned char _Py_ascii_whitespace[] = {
0, 0, 0, 0, 0, 0, 0, 0 0, 0, 0, 0, 0, 0, 0, 0
}; };
/* forward */
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
static PyObject* get_latin1_char(unsigned char ch);
static PyObject * static PyObject *
unicode_encode_call_errorhandler(const char *errors, unicode_encode_call_errorhandler(const char *errors,
...@@ -1034,10 +1041,10 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, ...@@ -1034,10 +1041,10 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
int unicode_ready_calls = 0; int unicode_ready_calls = 0;
#endif #endif
int static int
_PyUnicode_Ready(PyObject *obj) unicode_ready(PyObject **p_obj, int replace)
{ {
PyUnicodeObject *unicode = (PyUnicodeObject *)obj; PyUnicodeObject *unicode;
wchar_t *end; wchar_t *end;
Py_UCS4 maxchar = 0; Py_UCS4 maxchar = 0;
Py_ssize_t num_surrogates; Py_ssize_t num_surrogates;
...@@ -1045,6 +1052,9 @@ _PyUnicode_Ready(PyObject *obj) ...@@ -1045,6 +1052,9 @@ _PyUnicode_Ready(PyObject *obj)
Py_ssize_t length_wo_surrogates; Py_ssize_t length_wo_surrogates;
#endif #endif
assert(p_obj != NULL);
unicode = (PyUnicodeObject *)*p_obj;
/* _PyUnicode_Ready() is only intented for old-style API usage where /* _PyUnicode_Ready() is only intented for old-style API usage where
strings were created using _PyObject_New() and where no canonical strings were created using _PyObject_New() and where no canonical
representation (the str field) has been set yet aka strings representation (the str field) has been set yet aka strings
...@@ -1061,6 +1071,32 @@ _PyUnicode_Ready(PyObject *obj) ...@@ -1061,6 +1071,32 @@ _PyUnicode_Ready(PyObject *obj)
++unicode_ready_calls; ++unicode_ready_calls;
#endif #endif
#ifdef Py_DEBUG
assert(!replace || Py_REFCNT(unicode) == 1);
#else
if (replace && Py_REFCNT(unicode) != 1)
replace = 0;
#endif
if (replace) {
Py_ssize_t len = _PyUnicode_WSTR_LENGTH(unicode);
wchar_t *wstr = _PyUnicode_WSTR(unicode);
/* Optimization for empty strings */
if (len == 0) {
Py_INCREF(unicode_empty);
Py_DECREF(*p_obj);
*p_obj = unicode_empty;
return 0;
}
if (len == 1 && wstr[0] < 256) {
PyObject *latin1_char = get_latin1_char((unsigned char)wstr[0]);
if (latin1_char == NULL)
return -1;
Py_DECREF(*p_obj);
*p_obj = latin1_char;
return 0;
}
}
end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
&maxchar, &num_surrogates) == -1) &maxchar, &num_surrogates) == -1)
...@@ -1161,6 +1197,18 @@ _PyUnicode_Ready(PyObject *obj) ...@@ -1161,6 +1197,18 @@ _PyUnicode_Ready(PyObject *obj)
return 0; return 0;
} }
int
_PyUnicode_ReadyReplace(PyObject **op)
{
return unicode_ready(op, 1);
}
int
_PyUnicode_Ready(PyObject *op)
{
return unicode_ready(&op, 0);
}
static void static void
unicode_dealloc(register PyUnicodeObject *unicode) unicode_dealloc(register PyUnicodeObject *unicode)
{ {
...@@ -2524,7 +2572,7 @@ PyUnicode_Decode(const char *s, ...@@ -2524,7 +2572,7 @@ PyUnicode_Decode(const char *s,
goto onError; goto onError;
} }
Py_DECREF(buffer); Py_DECREF(buffer);
if (PyUnicode_READY(unicode)) { if (_PyUnicode_READY_REPLACE(&unicode)) {
Py_DECREF(unicode); Py_DECREF(unicode);
return NULL; return NULL;
} }
...@@ -3573,7 +3621,7 @@ utf7Error: ...@@ -3573,7 +3621,7 @@ utf7Error:
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(unicode) == -1) { if (_PyUnicode_READY_REPLACE(&unicode)) {
Py_DECREF(unicode); Py_DECREF(unicode);
return NULL; return NULL;
} }
...@@ -4137,14 +4185,13 @@ PyUnicode_DecodeUTF8Stateful(const char *s, ...@@ -4137,14 +4185,13 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
/* Adjust length and ready string when it contained errors and /* Adjust length and ready string when it contained errors and
is of the old resizable kind. */ is of the old resizable kind. */
if (kind == PyUnicode_WCHAR_KIND) { if (kind == PyUnicode_WCHAR_KIND) {
if (PyUnicode_Resize((PyObject**)&unicode, i) < 0 || if (PyUnicode_Resize((PyObject**)&unicode, i) < 0)
PyUnicode_READY(unicode) == -1)
goto onError; goto onError;
} }
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(unicode) == -1) { if (_PyUnicode_READY_REPLACE(&unicode)) {
Py_DECREF(unicode); Py_DECREF(unicode);
return NULL; return NULL;
} }
...@@ -4647,7 +4694,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, ...@@ -4647,7 +4694,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(unicode) == -1) { if (_PyUnicode_READY_REPLACE(&unicode)) {
Py_DECREF(unicode); Py_DECREF(unicode);
return NULL; return NULL;
} }
...@@ -5045,7 +5092,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, ...@@ -5045,7 +5092,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(unicode) == -1) { if (_PyUnicode_READY_REPLACE(&unicode)) {
Py_DECREF(unicode); Py_DECREF(unicode);
return NULL; return NULL;
} }
...@@ -5501,11 +5548,13 @@ PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -5501,11 +5548,13 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
{ {
if (PyUnicode_Resize((PyObject**)&v, i) < 0) if (PyUnicode_Resize((PyObject**)&v, i) < 0)
goto onError; goto onError;
if (PyUnicode_READY(v) == -1)
goto onError;
} }
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v);
return NULL;
}
return (PyObject *)v; return (PyObject *)v;
ucnhashError: ucnhashError:
...@@ -5803,7 +5852,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -5803,7 +5852,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
goto onError; goto onError;
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(v) == -1) { if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
...@@ -5991,7 +6040,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -5991,7 +6040,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
goto onError; goto onError;
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(v) == -1) { if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
...@@ -6417,7 +6466,7 @@ PyUnicode_DecodeASCII(const char *s, ...@@ -6417,7 +6466,7 @@ PyUnicode_DecodeASCII(const char *s,
goto onError; goto onError;
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(v) == -1) { if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
...@@ -6611,7 +6660,7 @@ PyUnicode_DecodeMBCSStateful(const char *s, ...@@ -6611,7 +6660,7 @@ PyUnicode_DecodeMBCSStateful(const char *s,
goto retry; goto retry;
} }
#endif #endif
if (PyUnicode_READY(v) == -1) { if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
...@@ -6910,7 +6959,7 @@ PyUnicode_DecodeCharmap(const char *s, ...@@ -6910,7 +6959,7 @@ PyUnicode_DecodeCharmap(const char *s,
goto onError; goto onError;
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
if (PyUnicode_READY(v) == -1) { if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
...@@ -7816,7 +7865,7 @@ _PyUnicode_TranslateCharmap(PyObject *input, ...@@ -7816,7 +7865,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
repunicode = unicode_translate_call_errorhandler(errors, &errorHandler, repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
reason, input, &exc, reason, input, &exc,
collstart, collend, &newpos); collstart, collend, &newpos);
if (repunicode == NULL || PyUnicode_READY(repunicode) == -1) if (repunicode == NULL || _PyUnicode_READY_REPLACE(&repunicode))
goto onError; goto onError;
/* generate replacement */ /* generate replacement */
repsize = PyUnicode_GET_LENGTH(repunicode); repsize = PyUnicode_GET_LENGTH(repunicode);
...@@ -8793,7 +8842,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) ...@@ -8793,7 +8842,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
Py_TYPE(separator)->tp_name); Py_TYPE(separator)->tp_name);
goto onError; goto onError;
} }
if (PyUnicode_READY(separator) == -1) if (PyUnicode_READY(separator))
goto onError; goto onError;
sep = separator; sep = separator;
seplen = PyUnicode_GET_LENGTH(separator); seplen = PyUnicode_GET_LENGTH(separator);
...@@ -10126,7 +10175,7 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) ...@@ -10126,7 +10175,7 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
j = 0; j = 0;
} }
if (PyUnicode_READY(u) == -1) { if (_PyUnicode_READY_REPLACE(&u)) {
Py_DECREF(u); Py_DECREF(u);
return NULL; return NULL;
} }
...@@ -12781,7 +12830,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ...@@ -12781,7 +12830,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (unicode == NULL) if (unicode == NULL)
return NULL; return NULL;
assert(_PyUnicode_CHECK(unicode)); assert(_PyUnicode_CHECK(unicode));
if (PyUnicode_READY(unicode)) if (_PyUnicode_READY_REPLACE(&unicode))
return NULL; return NULL;
self = (PyUnicodeObject *) type->tp_alloc(type, 0); self = (PyUnicodeObject *) type->tp_alloc(type, 0);
...@@ -12988,10 +13037,11 @@ PyUnicode_InternInPlace(PyObject **p) ...@@ -12988,10 +13037,11 @@ PyUnicode_InternInPlace(PyObject **p)
return; return;
if (PyUnicode_CHECK_INTERNED(s)) if (PyUnicode_CHECK_INTERNED(s))
return; return;
if (PyUnicode_READY(s) == -1) { if (_PyUnicode_READY_REPLACE(p)) {
assert(0 && "PyUnicode_READY fail in PyUnicode_InternInPlace"); assert(0 && "PyUnicode_READY fail in PyUnicode_InternInPlace");
return; return;
} }
s = (PyUnicodeObject *)(*p);
if (interned == NULL) { if (interned == NULL) {
interned = PyDict_New(); interned = PyDict_New();
if (interned == NULL) { if (interned == NULL) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment