Commit a5c68c3c authored by Victor Stinner's avatar Victor Stinner

Issue #8923: cache str.encode() result

When a string is encoded to UTF-8 in strict mode, the result is cached into the
object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String()
and PyUnicode_AsEncodedString(unicode, "utf-8", NULL).
parent f3fd733f
......@@ -10,6 +10,11 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #8923: When a string is encoded to UTF-8 in strict mode, the result is
cached into the object. Examples: str.encode(), str.encode('utf-8'),
PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8",
NULL).
- Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats.
- Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
......
......@@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}
if (encoding == NULL)
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
return PyUnicode_AsUTF8String(unicode);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
{
if (errors == NULL || strcmp(errors, "strict") == 0) {
return PyUnicode_AsUTF8String(unicode);
} else {
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
}
}
else if ((strcmp(lower, "latin-1") == 0) ||
(strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
......@@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
PyObject *
PyUnicode_AsUTF8String(PyObject *unicode)
{
PyObject *utf8;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
}
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
if (utf8 == NULL)
return NULL;
Py_INCREF(utf8);
return utf8;
}
/* --- UTF-32 Codec ------------------------------------------------------- */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment