Commit ecccd288 authored by Victor Stinner's avatar Victor Stinner

Issue #8922: Normalize the encoding name in PyUnicode_AsEncodedString() to

enable shortcuts for upper case encoding name. Add also a shortcut for
"iso-8859-1" in PyUnicode_AsEncodedString() and PyUnicode_Decode().
parent 1e2bfb77
...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? ...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #8922: Normalize the encoding name in PyUnicode_AsEncodedString() to
enable shortcuts for upper case encoding name. Add also a shortcut for
"iso-8859-1" in PyUnicode_AsEncodedString() and PyUnicode_Decode().
- Issue #8838: Remove codecs.charbuffer_encode() function. The buffer protocol - Issue #8838: Remove codecs.charbuffer_encode() function. The buffer protocol
doesn't support "char buffer" anymore in Python3. doesn't support "char buffer" anymore in Python3.
......
...@@ -1293,25 +1293,21 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -1293,25 +1293,21 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
return NULL; return NULL;
} }
PyObject *PyUnicode_Decode(const char *s, /* Convert encoding to lower case and replace '_' with '-' in order to
Py_ssize_t size, catch e.g. UTF_8. Truncate the string if it is longer than lower_len-1
const char *encoding, characters. */
const char *errors) static void normalize_encoding(const char *encoding,
char *lower,
size_t lower_len)
{ {
PyObject *buffer = NULL, *unicode;
Py_buffer info;
char lower[20]; /* Enough for any encoding name we recognize */
char *l;
const char *e; const char *e;
char *l;
char *l_end;
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
/* Convert encoding to lower case and replace '_' with '-' in order to
catch e.g. UTF_8 */
e = encoding; e = encoding;
l = lower; l = lower;
while (*e && l < &lower[(sizeof lower) - 2]) { l_end = &lower[lower_len - 1];
while (*e && l < l_end) {
if (ISUPPER(*e)) { if (ISUPPER(*e)) {
*l++ = TOLOWER(*e++); *l++ = TOLOWER(*e++);
} }
...@@ -1324,8 +1320,22 @@ PyObject *PyUnicode_Decode(const char *s, ...@@ -1324,8 +1320,22 @@ PyObject *PyUnicode_Decode(const char *s,
} }
} }
*l = '\0'; *l = '\0';
}
PyObject *PyUnicode_Decode(const char *s,
Py_ssize_t size,
const char *encoding,
const char *errors)
{
PyObject *buffer = NULL, *unicode;
Py_buffer info;
char lower[11]; /* Enough for any encoding shortcut */
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
/* Shortcuts for common default encodings */ /* Shortcuts for common default encodings */
normalize_encoding(encoding, lower, sizeof(lower));
if (strcmp(lower, "utf-8") == 0) if (strcmp(lower, "utf-8") == 0)
return PyUnicode_DecodeUTF8(s, size, errors); return PyUnicode_DecodeUTF8(s, size, errors);
else if ((strcmp(lower, "latin-1") == 0) || else if ((strcmp(lower, "latin-1") == 0) ||
...@@ -1478,6 +1488,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, ...@@ -1478,6 +1488,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
const char *errors) const char *errors)
{ {
PyObject *v; PyObject *v;
char lower[11]; /* Enough for any encoding shortcut */
if (!PyUnicode_Check(unicode)) { if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument(); PyErr_BadArgument();
...@@ -1488,21 +1499,23 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, ...@@ -1488,21 +1499,23 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
encoding = PyUnicode_GetDefaultEncoding(); encoding = PyUnicode_GetDefaultEncoding();
/* Shortcuts for common default encodings */ /* Shortcuts for common default encodings */
if (strcmp(encoding, "utf-8") == 0) normalize_encoding(encoding, lower, sizeof(lower));
if (strcmp(lower, "utf-8") == 0)
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
errors); errors);
else if (strcmp(encoding, "latin-1") == 0) else if ((strcmp(lower, "latin-1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
errors); errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
else if (strcmp(encoding, "mbcs") == 0) else if (strcmp(lower, "mbcs") == 0)
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
errors); errors);
#endif #endif
else if (strcmp(encoding, "ascii") == 0) else if (strcmp(lower, "ascii") == 0)
return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
errors); errors);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment