Commit 7581cef6 authored by Victor Stinner's avatar Victor Stinner

Adapt the code page encoder to the new unicode_encode_call_errorhandler()

The code is not correct, but at least it doesn't crash anymore.
parent 65f51bb1
...@@ -7405,6 +7405,7 @@ error: ...@@ -7405,6 +7405,7 @@ error:
*/ */
static int static int
encode_code_page_errors(UINT code_page, PyObject **outbytes, encode_code_page_errors(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t unicode_offset,
const Py_UNICODE *in, const int insize, const Py_UNICODE *in, const int insize,
const char* errors) const char* errors)
{ {
...@@ -7505,14 +7506,14 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, ...@@ -7505,14 +7506,14 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
} }
charsize = Py_MAX(charsize - 1, 1); charsize = Py_MAX(charsize - 1, 1);
startpos = in - startin; startpos = unicode_offset + in - startin;
rep = unicode_encode_call_errorhandler( rep = unicode_encode_call_errorhandler(
errors, &errorHandler, encoding, reason, errors, &errorHandler, encoding, reason,
startin, insize, &exc, unicode, &exc,
startpos, startpos + charsize, &newpos); startpos, startpos + charsize, &newpos);
if (rep == NULL) if (rep == NULL)
goto error; goto error;
in = startin + newpos; in += (newpos - startpos);
if (PyBytes_Check(rep)) { if (PyBytes_Check(rep)) {
outsize = PyBytes_GET_SIZE(rep); outsize = PyBytes_GET_SIZE(rep);
...@@ -7590,6 +7591,7 @@ error: ...@@ -7590,6 +7591,7 @@ error:
*/ */
static int static int
encode_code_page_chunk(UINT code_page, PyObject **outbytes, encode_code_page_chunk(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t unicode_offset,
const Py_UNICODE *p, int size, const Py_UNICODE *p, int size,
const char* errors) const char* errors)
{ {
...@@ -7604,45 +7606,60 @@ encode_code_page_chunk(UINT code_page, PyObject **outbytes, ...@@ -7604,45 +7606,60 @@ encode_code_page_chunk(UINT code_page, PyObject **outbytes,
return 0; return 0;
} }
done = encode_code_page_strict(code_page, outbytes, p, size, errors); done = encode_code_page_strict(code_page, outbytes,
p, size,
errors);
if (done == -2) if (done == -2)
done = encode_code_page_errors(code_page, outbytes, p, size, errors); done = encode_code_page_errors(code_page, outbytes,
unicode, unicode_offset,
p, size,
errors);
return done; return done;
} }
static PyObject * static PyObject *
encode_code_page(int code_page, encode_code_page(int code_page,
const Py_UNICODE *p, Py_ssize_t size, PyObject *unicode,
const char *errors) const char *errors)
{ {
const Py_UNICODE *p;
Py_ssize_t size;
PyObject *outbytes = NULL; PyObject *outbytes = NULL;
int ret; Py_ssize_t offset;
int chunk_len, ret;
p = PyUnicode_AsUnicodeAndSize(unicode, &size);
if (p == NULL)
return NULL;
if (code_page < 0) { if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number"); PyErr_SetString(PyExc_ValueError, "invalid code page number");
return NULL; return NULL;
} }
offset = 0;
do
{
#ifdef NEED_RETRY #ifdef NEED_RETRY
retry: if (size > INT_MAX)
if (size > INT_MAX) chunk_len = INT_MAX;
ret = encode_code_page_chunk(code_page, &outbytes, p, INT_MAX, errors); else
else
#endif #endif
ret = encode_code_page_chunk(code_page, &outbytes, p, (int)size, errors); chunk_len = (int)size;
ret = encode_code_page_chunk(code_page, &outbytes,
if (ret < 0) { unicode, offset,
Py_XDECREF(outbytes); p, chunk_len,
return NULL; errors);
}
if (ret < 0) {
Py_XDECREF(outbytes);
return NULL;
}
#ifdef NEED_RETRY p += chunk_len;
if (size > INT_MAX) { offset += chunk_len;
p += INT_MAX; size -= chunk_len;
size -= INT_MAX; } while (size != 0);
goto retry;
}
#endif
return outbytes; return outbytes;
} }
...@@ -7652,7 +7669,13 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p, ...@@ -7652,7 +7669,13 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors)
{ {
return encode_code_page(CP_ACP, p, size, errors); PyObject *unicode, *res;
unicode = PyUnicode_FromUnicode(p, size);
if (unicode == NULL)
return NULL;
res = encode_code_page(CP_ACP, unicode, errors);
Py_DECREF(unicode);
return res;
} }
PyObject * PyObject *
...@@ -7660,12 +7683,7 @@ PyUnicode_EncodeCodePage(int code_page, ...@@ -7660,12 +7683,7 @@ PyUnicode_EncodeCodePage(int code_page,
PyObject *unicode, PyObject *unicode,
const char *errors) const char *errors)
{ {
const Py_UNICODE *p; return encode_code_page(code_page, unicode, errors);
Py_ssize_t size;
p = PyUnicode_AsUnicodeAndSize(unicode, &size);
if (p == NULL)
return NULL;
return encode_code_page(code_page, p, size, errors);
} }
PyObject * PyObject *
...@@ -7675,9 +7693,7 @@ PyUnicode_AsMBCSString(PyObject *unicode) ...@@ -7675,9 +7693,7 @@ PyUnicode_AsMBCSString(PyObject *unicode)
PyErr_BadArgument(); PyErr_BadArgument();
return NULL; return NULL;
} }
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL);
PyUnicode_GET_SIZE(unicode),
NULL);
} }
#undef NEED_RETRY #undef NEED_RETRY
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment