Commit 6345be9a authored by Victor Stinner's avatar Victor Stinner

Close #13093: PyUnicode_EncodeDecimal() doesn't support error handlers

different than "strict" anymore. The caller was unable to compute the
size of the output buffer: it depends on the error handler.
parent e7ede067
...@@ -1816,20 +1816,10 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1816,20 +1816,10 @@ class UnicodeTest(string_tests.CommonTest,
b' 3.14 ') b' 3.14 ')
self.assertRaises(UnicodeEncodeError, self.assertRaises(UnicodeEncodeError,
unicode_encodedecimal, "123\u20ac", "strict") unicode_encodedecimal, "123\u20ac", "strict")
self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"), self.assertRaisesRegex(
b'123?') ValueError,
self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"), "^'decimal' codec can't encode character",
b'123') unicode_encodedecimal, "123\u20ac", "replace")
self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
b'123€')
self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
b'123\\u20ac')
self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
b'123? ')
self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
b'123??')
self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
b'123?0')
def test_transform_decimal(self): def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal from _testcapi import unicode_transformdecimaltoascii as transform_decimal
......
...@@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1? ...@@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
different than "strict" anymore. The caller was unable to compute the
size of the output buffer: it depends on the error handler.
- PEP 3155 / issue #13448: Qualified name for classes and functions. - PEP 3155 / issue #13448: Qualified name for classes and functions.
- Issue #13436: Fix a bogus error message when an AST object was passed - Issue #13436: Fix a bogus error message when an AST object was passed
......
...@@ -8839,15 +8839,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, ...@@ -8839,15 +8839,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
char *output, char *output,
const char *errors) const char *errors)
{ {
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *unicode; PyObject *unicode;
const char *encoding = "decimal"; Py_ssize_t i;
const char *reason = "invalid decimal Unicode string";
/* the following variable is used for caching string comparisons
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
Py_ssize_t i, j;
enum PyUnicode_Kind kind; enum PyUnicode_Kind kind;
void *data; void *data;
...@@ -8860,15 +8853,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, ...@@ -8860,15 +8853,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
if (unicode == NULL) if (unicode == NULL)
return -1; return -1;
if (PyUnicode_READY(unicode) < 0) if (PyUnicode_READY(unicode) < 0) {
goto onError; Py_DECREF(unicode);
return -1;
}
kind = PyUnicode_KIND(unicode); kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode); data = PyUnicode_DATA(unicode);
for (i=0; i < length; ) { for (i=0; i < length; ) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i); PyObject *exc;
Py_UCS4 ch;
int decimal; int decimal;
Py_ssize_t startpos, endpos; Py_ssize_t startpos;
ch = PyUnicode_READ(kind, data, i);
if (Py_UNICODE_ISSPACE(ch)) { if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' '; *output++ = ' ';
...@@ -8886,113 +8884,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, ...@@ -8886,113 +8884,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
i++; i++;
continue; continue;
} }
/* All other characters are considered unencodable */
startpos = i;
endpos = i+1;
for (; endpos < length; endpos++) {
ch = PyUnicode_READ(kind, data, endpos);
if ((0 < ch && ch < 256) ||
Py_UNICODE_ISSPACE(ch) ||
0 <= Py_UNICODE_TODECIMAL(ch))
break;
}
/* cache callback name lookup
* (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) {
if ((errors==NULL) || (!strcmp(errors, "strict")))
known_errorHandler = 1;
else if (!strcmp(errors, "replace"))
known_errorHandler = 2;
else if (!strcmp(errors, "ignore"))
known_errorHandler = 3;
else if (!strcmp(errors, "xmlcharrefreplace"))
known_errorHandler = 4;
else
known_errorHandler = 0;
}
switch (known_errorHandler) {
case 1: /* strict */
raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason);
goto onError;
case 2: /* replace */
for (j=startpos; j < endpos; j++)
*output++ = '?';
i = endpos;
break;
case 3: /* ignore */
i = endpos;
break;
case 4: /* xmlcharrefreplace */
/* generate replacement */
for (j=startpos; j < endpos; j++) {
ch = PyUnicode_READ(kind, data, i);
output += sprintf(output, "&#%d;", (int)ch);
i++;
}
break;
default:
{
PyObject *repunicode;
Py_ssize_t repsize, newpos, k;
enum PyUnicode_Kind repkind;
void *repdata;
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
encoding, reason, unicode, &exc,
startpos, endpos, &newpos);
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
/* Byte results not supported, since they have no decimal property. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
Py_DECREF(repunicode);
goto onError;
}
if (PyUnicode_READY(repunicode) < 0) {
Py_DECREF(repunicode);
goto onError;
}
repkind = PyUnicode_KIND(repunicode);
repdata = PyUnicode_DATA(repunicode);
/* generate replacement */ startpos = i;
repsize = PyUnicode_GET_SIZE(repunicode); exc = NULL;
for (k=0; k<repsize; k++) { raise_encode_exception(&exc, "decimal", unicode,
ch = PyUnicode_READ(repkind, repdata, k); startpos, startpos+1,
if (Py_UNICODE_ISSPACE(ch)) "invalid decimal Unicode string");
*output++ = ' '; Py_XDECREF(exc);
else { Py_DECREF(unicode);
decimal = Py_UNICODE_TODECIMAL(ch); return -1;
if (decimal >= 0)
*output++ = '0' + decimal;
else if (0 < ch && ch < 256)
*output++ = (char)ch;
else {
Py_DECREF(repunicode);
raise_encode_exception(&exc, encoding,
unicode, startpos, endpos,
reason);
goto onError;
}
}
}
i = newpos;
Py_DECREF(repunicode);
}
}
} }
/* 0-terminate the output string */ /* 0-terminate the output string */
*output++ = '\0'; *output++ = '\0';
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
Py_DECREF(unicode); Py_DECREF(unicode);
return 0; return 0;
onError:
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
Py_DECREF(unicode);
return -1;
} }
/* --- Helpers ------------------------------------------------------------ */ /* --- Helpers ------------------------------------------------------------ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment