Commit 0030cd52 authored by Victor Stinner's avatar Victor Stinner

Issue #25227: Cleanup unicode_encode_ucs1() error handler

* Change limit type from unsigned int to Py_UCS4, to use the same type than the
  "ch" variable (an Unicode character).
* Reuse ch variable for _Py_ERROR_XMLCHARREFREPLACE
* Add some newlines for readability
parent 1e5fcc3d
...@@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors, ...@@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors,
static PyObject * static PyObject *
unicode_encode_ucs1(PyObject *unicode, unicode_encode_ucs1(PyObject *unicode,
const char *errors, const char *errors,
unsigned int limit) const Py_UCS4 limit)
{ {
/* input state */ /* input state */
Py_ssize_t pos=0, size; Py_ssize_t pos=0, size;
...@@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode,
ressize = size; ressize = size;
while (pos < size) { while (pos < size) {
Py_UCS4 c = PyUnicode_READ(kind, data, pos); Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
/* can we encode this? */ /* can we encode this? */
if (c<limit) { if (ch < limit) {
/* no overflow check, because we know that the space is enough */ /* no overflow check, because we know that the space is enough */
*str++ = (char)c; *str++ = (char)ch;
++pos; ++pos;
} }
else { else {
...@@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode,
case _Py_ERROR_REPLACE: case _Py_ERROR_REPLACE:
while (collstart++ < collend) while (collstart++ < collend)
*str++ = '?'; *str++ = '?';
/* fall through */ /* fall through ignore error handler */
case _Py_ERROR_IGNORE: case _Py_ERROR_IGNORE:
pos = collend; pos = collend;
break; break;
...@@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode,
requiredsize = respos; requiredsize = respos;
/* determine replacement size */ /* determine replacement size */
for (i = collstart; i < collend; ++i) { for (i = collstart; i < collend; ++i) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_ssize_t incr; Py_ssize_t incr;
ch = PyUnicode_READ(kind, data, i);
if (ch < 10) if (ch < 10)
incr = 2+1+1; incr = 2+1+1;
else if (ch < 100) else if (ch < 100)
...@@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode,
if (repunicode == NULL || (PyUnicode_Check(repunicode) && if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
PyUnicode_READY(repunicode) == -1)) PyUnicode_READY(repunicode) == -1))
goto onError; goto onError;
if (PyBytes_Check(repunicode)) { if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */ /* Directly copy bytes result to output. */
repsize = PyBytes_Size(repunicode); repsize = PyBytes_Size(repunicode);
...@@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode,
Py_DECREF(repunicode); Py_DECREF(repunicode);
break; break;
} }
/* need more space? (at least enough for what we /* need more space? (at least enough for what we
have+the replacement+the rest of the string, so have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */ we won't have to check space for encodable characters) */
...@@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode, ...@@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode,
str = PyBytes_AS_STRING(res) + respos; str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize; ressize = requiredsize;
} }
/* check if there is anything unencodable in the replacement /* check if there is anything unencodable in the replacement
and copy it to the output */ and copy it to the output */
for (i = 0; repsize-->0; ++i, ++str) { for (i = 0; repsize-->0; ++i, ++str) {
c = PyUnicode_READ_CHAR(repunicode, i); ch = PyUnicode_READ_CHAR(repunicode, i);
if (c >= limit) { if (ch >= limit) {
raise_encode_exception(&exc, encoding, unicode, raise_encode_exception(&exc, encoding, unicode,
pos, pos+1, reason); pos, pos+1, reason);
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
} }
*str = (char)c; *str = (char)ch;
} }
pos = newpos; pos = newpos;
Py_DECREF(repunicode); Py_DECREF(repunicode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment