Commit ee4544c9 authored by Victor Stinner's avatar Victor Stinner

Issue #14744: Inline unicode_writer_write_char() and unicode_write_str()

Optimize also PyUnicode_Format(): call unicode_writer_prepare() only once
per argument.
parent c1fdad3e
...@@ -500,6 +500,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write ...@@ -500,6 +500,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write
PyObject *result = NULL; PyObject *result = NULL;
PyObject *format_spec_object = NULL; PyObject *format_spec_object = NULL;
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
Py_ssize_t len;
/* If we know the type exactly, skip the lookup of __format__ and just /* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */ call the formatter directly. */
...@@ -533,12 +534,19 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write ...@@ -533,12 +534,19 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write
result = PyObject_Format(fieldobj, format_spec_object); result = PyObject_Format(fieldobj, format_spec_object);
} }
if (result == NULL || PyUnicode_READY(result) == -1) if (result == NULL)
goto done;
if (PyUnicode_READY(result) == -1)
goto done; goto done;
assert(PyUnicode_Check(result)); len = PyUnicode_GET_LENGTH(result);
if (unicode_writer_prepare(writer,
ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0); len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
goto done;
copy_characters(writer->buffer, writer->pos,
result, 0, len);
writer->pos += len;
ok = 1;
done: done:
Py_XDECREF(format_spec_object); Py_XDECREF(format_spec_object);
Py_XDECREF(result); Py_XDECREF(result);
...@@ -873,7 +881,8 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, ...@@ -873,7 +881,8 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal; SubString literal;
SubString field_name; SubString field_name;
SubString format_spec; SubString format_spec;
Py_UCS4 conversion; Py_UCS4 conversion, maxchar;
Py_ssize_t sublen;
int err; int err;
MarkupIterator_init(&iter, input->str, input->start, input->end); MarkupIterator_init(&iter, input->str, input->start, input->end);
...@@ -881,11 +890,18 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, ...@@ -881,11 +890,18 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
&field_name, &format_spec, &field_name, &format_spec,
&conversion, &conversion,
&format_spec_needs_expanding)) == 2) { &format_spec_needs_expanding)) == 2) {
err = unicode_writer_write_str(writer, sublen = literal.end - literal.start;
literal.str, literal.start, if (sublen) {
literal.end - literal.start); maxchar = _PyUnicode_FindMaxChar(literal.str,
if (err == -1) literal.start, literal.end);
return 0; err = unicode_writer_prepare(writer, sublen, maxchar);
if (err == -1)
return 0;
copy_characters(writer->buffer, writer->pos,
literal.str, literal.start, sublen);
writer->pos += sublen;
}
if (field_present) if (field_present)
if (!output_markup(&field_name, &format_spec, if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, writer, format_spec_needs_expanding, conversion, writer,
......
...@@ -1150,14 +1150,15 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, ...@@ -1150,14 +1150,15 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
void *from_data, *to_data; void *from_data, *to_data;
int fast; int fast;
assert(0 <= how_many);
assert(0 <= from_start);
assert(0 <= to_start);
assert(PyUnicode_Check(from)); assert(PyUnicode_Check(from));
assert(PyUnicode_Check(to)); assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(from)); assert(PyUnicode_IS_READY(from));
assert(PyUnicode_IS_READY(to)); assert(PyUnicode_IS_READY(to));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
assert(PyUnicode_GET_LENGTH(from) >= how_many);
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
assert(0 <= how_many);
if (how_many == 0) if (how_many == 0)
return 0; return 0;
...@@ -13271,48 +13272,6 @@ unicode_writer_prepare(unicode_writer_t *writer, ...@@ -13271,48 +13272,6 @@ unicode_writer_prepare(unicode_writer_t *writer,
return 0; return 0;
} }
Py_LOCAL_INLINE(int)
unicode_writer_write_str(
unicode_writer_t *writer,
PyObject *str, Py_ssize_t start, Py_ssize_t length)
{
Py_UCS4 maxchar;
assert(str != NULL);
assert(PyUnicode_Check(str));
if (PyUnicode_READY(str) == -1)
return -1;
assert(0 <= start);
assert(0 <= length);
assert(start + length <= PyUnicode_GET_LENGTH(str));
if (length == 0)
return 0;
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
if (unicode_writer_prepare(writer, length, maxchar) == -1)
return -1;
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
copy_characters(writer->buffer, writer->pos,
str, start, length);
writer->pos += length;
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_write_char(
unicode_writer_t *writer,
Py_UCS4 ch)
{
if (unicode_writer_prepare(writer, 1, ch) == -1)
return -1;
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
writer->pos += 1;
return 0;
}
Py_LOCAL(PyObject *) Py_LOCAL(PyObject *)
unicode_writer_finish(unicode_writer_t *writer) unicode_writer_finish(unicode_writer_t *writer)
{ {
...@@ -13791,6 +13750,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13791,6 +13750,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
void *fmt; void *fmt;
enum PyUnicode_Kind kind, fmtkind; enum PyUnicode_Kind kind, fmtkind;
unicode_writer_t writer; unicode_writer_t writer;
Py_ssize_t sublen;
Py_UCS4 maxchar;
if (format == NULL || args == NULL) { if (format == NULL || args == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
...@@ -13833,8 +13794,15 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13833,8 +13794,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
} }
if (fmtcnt < 0) if (fmtcnt < 0)
fmtpos--; fmtpos--;
if (unicode_writer_write_str(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) sublen = fmtpos - nonfmtpos;
maxchar = _PyUnicode_FindMaxChar(uformat,
nonfmtpos, nonfmtpos + sublen);
if (unicode_writer_prepare(&writer, sublen, maxchar) == -1)
goto onError; goto onError;
copy_characters(writer.buffer, writer.pos,
uformat, nonfmtpos, sublen);
writer.pos += sublen;
} }
else { else {
/* Got a format specifier */ /* Got a format specifier */
...@@ -13849,6 +13817,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13849,6 +13817,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
PyObject *v = NULL; PyObject *v = NULL;
void *pbuf = NULL; void *pbuf = NULL;
Py_ssize_t pindex, len; Py_ssize_t pindex, len;
Py_UCS4 bufmaxchar;
Py_ssize_t buflen;
fmtpos++; fmtpos++;
c = PyUnicode_READ(fmtkind, fmt, fmtpos); c = PyUnicode_READ(fmtkind, fmt, fmtpos);
...@@ -13991,8 +13961,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -13991,8 +13961,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
} }
if (c == '%') { if (c == '%') {
if (unicode_writer_write_char(&writer, '%') < 0) if (unicode_writer_prepare(&writer, 1, '%') == -1)
goto onError; goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%');
writer.pos += 1;
continue; continue;
} }
...@@ -14126,10 +14098,35 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -14126,10 +14098,35 @@ PyUnicode_Format(PyObject *format, PyObject *args)
} }
if (width < len) if (width < len)
width = len; width = len;
/* Compute the length and maximum character of the
written characters */
bufmaxchar = 127;
if (!(flags & F_LJUST)) {
if (sign) {
if ((width-1) > len)
bufmaxchar = Py_MAX(bufmaxchar, fill);
}
else {
if (width > len)
bufmaxchar = Py_MAX(bufmaxchar, fill);
}
}
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
bufmaxchar = Py_MAX(bufmaxchar, maxchar);
buflen = width;
if (sign && len == width)
buflen++;
if (unicode_writer_prepare(&writer, buflen, bufmaxchar) == -1)
goto onError;
/* Write characters */
if (sign) { if (sign) {
if (fill != ' ') { if (fill != ' ') {
if (unicode_writer_write_char(&writer, signchar) < 0) PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
goto onError; writer.pos += 1;
} }
if (width > len) if (width > len)
width--; width--;
...@@ -14138,8 +14135,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -14138,8 +14135,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
if (fill != ' ') { if (fill != ' ') {
if (unicode_writer_prepare(&writer, 2, 127) < 0)
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
writer.pos += 2; writer.pos += 2;
...@@ -14151,43 +14146,35 @@ PyUnicode_Format(PyObject *format, PyObject *args) ...@@ -14151,43 +14146,35 @@ PyUnicode_Format(PyObject *format, PyObject *args)
len -= 2; len -= 2;
} }
if (width > len && !(flags & F_LJUST)) { if (width > len && !(flags & F_LJUST)) {
Py_ssize_t sublen;
sublen = width - len; sublen = width - len;
if (unicode_writer_prepare(&writer, sublen, fill) < 0)
goto onError;
FILL(writer.kind, writer.data, fill, writer.pos, sublen); FILL(writer.kind, writer.data, fill, writer.pos, sublen);
writer.pos += sublen; writer.pos += sublen;
width = len; width = len;
} }
if (fill == ' ') { if (fill == ' ') {
if (sign) { if (sign) {
if (unicode_writer_write_char(&writer, signchar) < 0) PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
goto onError; writer.pos += 1;
} }
if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
if (unicode_writer_prepare(&writer, 2, 127) < 0)
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
writer.pos += 2; writer.pos += 2;
pindex += 2; pindex += 2;
} }
} }
/* Copy all characters, preserving len */ copy_characters(writer.buffer, writer.pos,
if (unicode_writer_write_str(&writer, temp, pindex, len) < 0) temp, pindex, len);
goto onError; writer.pos += len;
if (width > len) { if (width > len) {
Py_ssize_t sublen = width - len; sublen = width - len;
if (unicode_writer_prepare(&writer, sublen, ' ') < 0)
goto onError;
FILL(writer.kind, writer.data, ' ', writer.pos, sublen); FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
writer.pos += sublen; writer.pos += sublen;
} }
if (dict && (argidx < arglen) && c != '%') { if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"not all arguments converted during string formatting"); "not all arguments converted during string formatting");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment