Commit 8f674ccd authored by Victor Stinner's avatar Victor Stinner

Close #17694: Add minimum length to _PyUnicodeWriter

 * Add also min_char attribute to _PyUnicodeWriter structure (currently unused)
 * _PyUnicodeWriter_Init() has no more argument (except the writer itself):
   min_length and overallocate must be set explicitly
 * In error handlers, only enable overallocation if the replacement string
   is longer than 1 character
 * CJK decoders don't use overallocation anymore
 * Set min_length, instead of preallocating memory using
   _PyUnicodeWriter_Prepare(), in many decoders
 * _PyUnicode_DecodeUnicodeInternal() checks for integer overflow
parent e84a51c3
...@@ -898,22 +898,28 @@ typedef struct { ...@@ -898,22 +898,28 @@ typedef struct {
Py_UCS4 maxchar; Py_UCS4 maxchar;
Py_ssize_t size; Py_ssize_t size;
Py_ssize_t pos; Py_ssize_t pos;
/* minimum length of the buffer when overallocation is enabled,
see _PyUnicodeWriter_Init() */ /* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length; Py_ssize_t min_length;
/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;
/* If non-zero, overallocate the buffer by 25% (default: 0). */
unsigned char overallocate; unsigned char overallocate;
/* If readonly is 1, buffer is a shared string (cannot be modified) /* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */ and size is set to 0. */
unsigned char readonly; unsigned char readonly;
} _PyUnicodeWriter ; } _PyUnicodeWriter ;
/* Initialize a Unicode writer. /* Initialize a Unicode writer.
*
If min_length is greater than zero, _PyUnicodeWriter_Prepare() * By default, the minimum buffer size is 0 character and overallocation is
overallocates the buffer and min_length is the minimum length in characters * disabled. Set min_length, min_char and overallocate attributes to control
of the buffer. */ * the allocation of the buffer. */
PyAPI_FUNC(void) PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length); _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
/* Prepare the buffer to write 'length' characters /* Prepare the buffer to write 'length' characters
with the specified maximum character. with the specified maximum character.
......
...@@ -633,7 +633,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, ...@@ -633,7 +633,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
return make_tuple(PyUnicode_New(0, 0), 0); return make_tuple(PyUnicode_New(0, 0), 0);
} }
_PyUnicodeWriter_Init(&buf.writer, datalen); _PyUnicodeWriter_Init(&buf.writer);
buf.writer.min_length = datalen;
buf.excobj = NULL; buf.excobj = NULL;
buf.inbuf = buf.inbuf_top = (unsigned char *)data; buf.inbuf = buf.inbuf_top = (unsigned char *)data;
buf.inbuf_end = buf.inbuf_top + datalen; buf.inbuf_end = buf.inbuf_top + datalen;
...@@ -839,7 +840,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, ...@@ -839,7 +840,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
{ {
buf->inbuf = buf->inbuf_top = (const unsigned char *)data; buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
buf->inbuf_end = buf->inbuf_top + size; buf->inbuf_end = buf->inbuf_top + size;
_PyUnicodeWriter_Init(&buf->writer, size); buf->writer.min_length += size;
return 0; return 0;
} }
...@@ -1037,7 +1038,7 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, ...@@ -1037,7 +1038,7 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
data = pdata.buf; data = pdata.buf;
size = pdata.len; size = pdata.len;
_PyUnicodeWriter_Init(&buf.writer, 1); _PyUnicodeWriter_Init(&buf.writer);
buf.excobj = NULL; buf.excobj = NULL;
origpending = self->pendingsize; origpending = self->pendingsize;
...@@ -1241,7 +1242,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, ...@@ -1241,7 +1242,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
if (sizehint == 0) if (sizehint == 0)
return PyUnicode_New(0, 0); return PyUnicode_New(0, 0);
_PyUnicodeWriter_Init(&buf.writer, 1); _PyUnicodeWriter_Init(&buf.writer);
buf.excobj = NULL; buf.excobj = NULL;
cres = NULL; cres = NULL;
......
...@@ -705,7 +705,7 @@ complex__format__(PyObject* self, PyObject* args) ...@@ -705,7 +705,7 @@ complex__format__(PyObject* self, PyObject* args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
_PyUnicodeWriter_Init(&writer, 0); _PyUnicodeWriter_Init(&writer);
ret = _PyComplex_FormatAdvancedWriter( ret = _PyComplex_FormatAdvancedWriter(
&writer, &writer,
self, self,
......
...@@ -1711,7 +1711,7 @@ float__format__(PyObject *self, PyObject *args) ...@@ -1711,7 +1711,7 @@ float__format__(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
_PyUnicodeWriter_Init(&writer, 0); _PyUnicodeWriter_Init(&writer);
ret = _PyFloat_FormatAdvancedWriter( ret = _PyFloat_FormatAdvancedWriter(
&writer, &writer,
self, self,
......
...@@ -4379,7 +4379,7 @@ long__format__(PyObject *self, PyObject *args) ...@@ -4379,7 +4379,7 @@ long__format__(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
_PyUnicodeWriter_Init(&writer, 0); _PyUnicodeWriter_Init(&writer);
ret = _PyLong_FormatAdvancedWriter( ret = _PyLong_FormatAdvancedWriter(
&writer, &writer,
self, self,
......
...@@ -906,7 +906,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, ...@@ -906,7 +906,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number) int recursion_depth, AutoNumber *auto_number)
{ {
_PyUnicodeWriter writer; _PyUnicodeWriter writer;
Py_ssize_t minlen;
/* check the recursion level */ /* check the recursion level */
if (recursion_depth <= 0) { if (recursion_depth <= 0) {
...@@ -915,8 +914,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, ...@@ -915,8 +914,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL; return NULL;
} }
minlen = PyUnicode_GET_LENGTH(input->str) + 100; _PyUnicodeWriter_Init(&writer);
_PyUnicodeWriter_Init(&writer, minlen); writer.overallocate = 1;
writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
if (!do_markup(input, args, kwargs, &writer, recursion_depth, if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) { auto_number)) {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment