Commit 299978df authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #15596: Faster pickling of unicode strings.

parent 5a3c6dbe
...@@ -307,6 +307,8 @@ Core and Builtins ...@@ -307,6 +307,8 @@ Core and Builtins
Library Library
------- -------
- Issue #15596: Faster pickling of unicode strings.
- Issue #17572: Avoid chained exceptions while passing bad directives to - Issue #17572: Avoid chained exceptions while passing bad directives to
time.strptime(). Initial patch by Claudiu Popa. time.strptime(). Initial patch by Claudiu Popa.
......
...@@ -1873,24 +1873,18 @@ done: ...@@ -1873,24 +1873,18 @@ done:
} }
static int static int
save_unicode(PicklerObject *self, PyObject *obj) write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
{ {
Py_ssize_t size;
PyObject *encoded = NULL;
if (self->bin) {
char pdata[5]; char pdata[5];
encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); #if SIZEOF_SIZE_T > 4
if (encoded == NULL) if (size > 0xffffffffUL) {
goto error; /* string too large */
size = PyBytes_GET_SIZE(encoded);
if (size > 0xffffffffL) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"cannot serialize a string larger than 4 GiB"); "cannot serialize a string larger than 4GB");
goto error; /* string too large */ return -1;
} }
#endif
pdata[0] = BINUNICODE; pdata[0] = BINUNICODE;
pdata[1] = (unsigned char)(size & 0xff); pdata[1] = (unsigned char)(size & 0xff);
...@@ -1898,38 +1892,78 @@ save_unicode(PicklerObject *self, PyObject *obj) ...@@ -1898,38 +1892,78 @@ save_unicode(PicklerObject *self, PyObject *obj)
pdata[3] = (unsigned char)((size >> 16) & 0xff); pdata[3] = (unsigned char)((size >> 16) & 0xff);
pdata[4] = (unsigned char)((size >> 24) & 0xff); pdata[4] = (unsigned char)((size >> 24) & 0xff);
if (_Pickler_Write(self, pdata, 5) < 0) if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
goto error; return -1;
if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) if (_Pickler_Write(self, data, size) < 0)
goto error; return -1;
return 0;
}
static int
write_unicode_binary(PicklerObject *self, PyObject *obj)
{
PyObject *encoded = NULL;
Py_ssize_t size;
char *data;
int r;
if (PyUnicode_READY(obj))
return -1;
data = PyUnicode_AsUTF8AndSize(obj, &size);
if (data != NULL)
return write_utf8(self, data, size);
/* Issue #8383: for strings with lone surrogates, fallback on the
"surrogatepass" error handler. */
PyErr_Clear();
encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
if (encoded == NULL)
return -1;
r = write_utf8(self, PyBytes_AS_STRING(encoded),
PyBytes_GET_SIZE(encoded));
Py_DECREF(encoded);
return r;
}
static int
save_unicode(PicklerObject *self, PyObject *obj)
{
if (self->bin) {
if (write_unicode_binary(self, obj) < 0)
return -1;
} }
else { else {
PyObject *encoded;
Py_ssize_t size;
const char unicode_op = UNICODE; const char unicode_op = UNICODE;
encoded = raw_unicode_escape(obj); encoded = raw_unicode_escape(obj);
if (encoded == NULL) if (encoded == NULL)
goto error; return -1;
if (_Pickler_Write(self, &unicode_op, 1) < 0) if (_Pickler_Write(self, &unicode_op, 1) < 0) {
goto error; Py_DECREF(encoded);
return -1;
}
size = PyBytes_GET_SIZE(encoded); size = PyBytes_GET_SIZE(encoded);
if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
goto error; Py_DECREF(encoded);
return -1;
}
Py_DECREF(encoded);
if (_Pickler_Write(self, "\n", 1) < 0) if (_Pickler_Write(self, "\n", 1) < 0)
goto error; return -1;
} }
if (memo_put(self, obj) < 0) if (memo_put(self, obj) < 0)
goto error; return -1;
Py_DECREF(encoded);
return 0; return 0;
error:
Py_XDECREF(encoded);
return -1;
} }
/* A helper for save_tuple. Push the len elements in tuple t on the stack. */ /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment