Commit d3f0882d authored by Victor Stinner's avatar Victor Stinner

Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)

 * Formatting string, int, float and complex use the _PyUnicodeWriter API. It
   avoids a temporary buffer in most cases.
 * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just
   keep a reference to the string if the output is only composed of one string
 * Disable overallocation when formatting the last argument of str%args and
   str.format(args)
 * Overallocation allocates at least 100 characters: add min_length attribute
   to the _PyUnicodeWriter structure
 * Add new private functions: _PyUnicode_FastCopyCharacters(),
   _PyUnicode_FastFill() and _PyUnicode_FromASCII()

The speed up is around 20% in average.
parent a1b0c9fc
......@@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif
#ifdef __cplusplus
......
......@@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif /* Py_LIMITED_API */
#ifdef __cplusplus
......
......@@ -151,14 +151,22 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
/* _PyLong_Format: Convert the long to a string object with given base,
appending a base prefix of 0[box] if base is 2, 8 or 16. */
PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base);
PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base);
PyAPI_FUNC(int) _PyLong_FormatWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
int base,
int alternate);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif /* Py_LIMITED_API */
/* These aren't really part of the long object, but they're handy. The
......
......@@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Py_ssize_t from_start,
Py_ssize_t how_many
);
/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
may crash if parameters are invalid (e.g. if the output string
is too short). */
PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
PyObject *to,
Py_ssize_t to_start,
PyObject *from,
Py_ssize_t from_start,
Py_ssize_t how_many
);
#endif
#ifndef Py_LIMITED_API
/* Fill a string with a character: write fill_char into
unicode[start:start+length].
......@@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Return the number of written character, or return -1 and raise an exception
on error. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
if parameters are invalid (e.g. if length is longer than the string). */
PyAPI_FUNC(void) _PyUnicode_FastFill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
#endif
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
......@@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */
);
#ifndef Py_LIMITED_API
/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
Scan the string to find the maximum character. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
int kind,
const void *buffer,
Py_ssize_t size);
/* Create a new string from a buffer of ASCII characters.
WARNING: Don't check if the string contains any non-ASCII character. */
PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
const char *buffer,
Py_ssize_t size);
#endif
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
......@@ -864,13 +890,70 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
...
);
#ifndef Py_LIMITED_API
typedef struct {
PyObject *buffer;
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;
/* minimum length of the buffer when overallocation is enabled,
see _PyUnicodeWriter_Init() */
Py_ssize_t min_length;
struct {
unsigned char overallocate:1;
/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly:1;
} flags;
} _PyUnicodeWriter ;
/* Initialize a Unicode writer.
If min_length is greater than zero, _PyUnicodeWriter_Prepare()
overallocates the buffer and min_length is the minimum length in characters
of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
#endif
#ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
......
......@@ -13,6 +13,9 @@ Core and Builtins
- Issue #14835: Make plistlib output empty arrays & dicts like OS X.
Patch by Sidney San Martín.
- Issue #14744: Use the new _PyUnicodeWriter internal API to speed up
str%args and str.format(args).
- Issue #14930: Make memoryview objects weakrefable.
- Issue #14775: Fix a potential quadratic dict build-up due to the garbage
......
......@@ -699,11 +699,22 @@ static PyObject *
complex__format__(PyObject* self, PyObject* args)
{
PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
return _PyComplex_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec));
return NULL;
_PyUnicodeWriter_Init(&writer, 0);
ret = _PyComplex_FormatAdvancedWriter(
&writer,
self,
format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
}
#if 0
......
......@@ -267,13 +267,15 @@ static PyObject *
float_repr(PyFloatObject *v)
{
PyObject *result;
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
'r', 0,
Py_DTSF_ADD_DOT_0,
NULL);
char *buf;
buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
'r', 0,
Py_DTSF_ADD_DOT_0,
NULL);
if (!buf)
return PyErr_NoMemory();
result = PyUnicode_FromString(buf);
result = _PyUnicode_FromASCII(buf, strlen(buf));
PyMem_Free(buf);
return result;
}
......@@ -1703,11 +1705,22 @@ static PyObject *
float__format__(PyObject *self, PyObject *args)
{
PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
return _PyFloat_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec));
_PyUnicodeWriter_Init(&writer, 0);
ret = _PyFloat_FormatAdvancedWriter(
&writer,
self,
format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
}
PyDoc_STRVAR(float__format__doc,
......
This diff is collapsed.
......@@ -18,7 +18,7 @@
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW unicode_fromascii
#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
......
......@@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
int ok = 0;
PyObject *result = NULL;
PyObject *format_spec_object = NULL;
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
Py_ssize_t len;
int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
int err;
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
if (PyUnicode_CheckExact(fieldobj))
formatter = _PyUnicode_FormatAdvanced;
formatter = _PyUnicode_FormatAdvancedWriter;
else if (PyLong_CheckExact(fieldobj))
formatter =_PyLong_FormatAdvanced;
formatter = _PyLong_FormatAdvancedWriter;
else if (PyFloat_CheckExact(fieldobj))
formatter = _PyFloat_FormatAdvanced;
/* XXX: for 2.6, convert format_spec to the appropriate type
(unicode, str) */
formatter = _PyFloat_FormatAdvancedWriter;
else if (PyComplex_CheckExact(fieldobj))
formatter = _PyComplex_FormatAdvancedWriter;
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */
result = formatter(fieldobj, format_spec->str,
format_spec->start, format_spec->end);
err = formatter(writer, fieldobj, format_spec->str,
format_spec->start, format_spec->end);
return (err == 0);
}
else {
/* We need to create an object out of the pointers we have, because
......@@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
}
if (result == NULL)
goto done;
if (PyUnicode_READY(result) == -1)
goto done;
len = PyUnicode_GET_LENGTH(result);
if (_PyUnicodeWriter_Prepare(writer,
len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
goto done;
copy_characters(writer->buffer, writer->pos,
result, 0, len);
writer->pos += len;
ok = 1;
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
......@@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
if (err == -1)
return 0;
copy_characters(writer->buffer, writer->pos,
literal.str, literal.start, sublen);
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
literal.str, literal.start, sublen);
writer->pos += sublen;
}
if (field_present)
if (field_present) {
if (iter.str.start == iter.str.end)
writer->flags.overallocate = 0;
if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, writer,
args, kwargs, recursion_depth, auto_number))
return 0;
}
}
return result;
}
......@@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
Py_ssize_t initlen;
Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
......@@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
initlen = PyUnicode_GET_LENGTH(input->str) + 100;
if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1)
return NULL;
minlen = PyUnicode_GET_LENGTH(input->str) + 100;
_PyUnicodeWriter_Init(&writer, minlen);
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment