Commit 59423e3d authored by Victor Stinner's avatar Victor Stinner Committed by GitHub

bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623)

Fix str.format(), float.__format__() and complex.__format__() methods
for non-ASCII decimal point when using the "n" formatter.

Changes:

* Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires
  a _PyUnicodeWriter object for the buffer and a Python str object
  for digits.
* Rename FILL() macro to unicode_fill(), convert it to static inline function,
  add "assert(0 <= start);" and rework its code.
parent df108dc6
......@@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
PyObject *unicode,
Py_ssize_t index,
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
void *digits,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
......
For :meth:`str.format`, :meth:`float.__format__` and
:meth:`complex.__format__` methods for non-ASCII decimal point when using
the "n" formatter.
/* stringlib: locale related helpers implementation */
#include <locale.h>
#if !STRINGLIB_IS_UNICODE
# error "localeutil.h is specific to Unicode"
#endif
/* _PyUnicode_InsertThousandsGrouping() helper functions */
typedef struct {
const char *grouping;
char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */
} STRINGLIB(GroupGenerator);
} GroupGenerator;
static void
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
......@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
}
}
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
Py_ssize_t thousands_sep_len)
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
PyObject *digits, Py_ssize_t *digits_pos,
Py_ssize_t n_chars, Py_ssize_t n_zeros,
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
Py_UCS4 *maxchar)
{
Py_ssize_t i;
if (!writer) {
/* if maxchar > 127, maxchar is already set */
if (*maxchar == 127 && thousands_sep) {
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
*maxchar = Py_MAX(*maxchar, maxchar2);
}
return;
}
if (thousands_sep) {
*buffer_end -= thousands_sep_len;
*buffer_pos -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
memcpy(*buffer_end, thousands_sep,
thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
}
*buffer_end -= n_chars;
*digits_end -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
*buffer_end -= n_zeros;
for (i = 0; i < n_zeros; i++)
(*buffer_end)[i] = '0';
}
/**
* InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @buffer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/
static Py_ssize_t
STRINGLIB(InsertThousandsGrouping)(
STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
STRINGLIB_CHAR *thousands_sep,
Py_ssize_t thousands_sep_len)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
STRINGLIB(GroupGenerator) groupgen;
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= l;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
thousands_sep, 0,
thousands_sep_len);
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = Py_MAX(Py_MAX(remaining, min_width), 1);
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
*buffer_pos -= n_chars;
*digits_pos -= n_chars;
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
digits, *digits_pos,
n_chars);
if (n_zeros) {
*buffer_pos -= n_zeros;
enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
void *data = PyUnicode_DATA(writer->buffer);
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
}
return count;
}
This diff is collapsed.
......@@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
/* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */
and more efficient enough to justify a little obfuscation?
Return -1 on error. */
static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
......@@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 grouping_maxchar;
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
NULL, 0,
0, NULL,
spec->n_digits, spec->n_min_width,
NULL, 0, spec->n_digits,
spec->n_min_width,
locale->grouping, locale->thousands_sep, &grouping_maxchar);
if (spec->n_grouped_digits == -1) {
return -1;
}
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
}
......@@ -635,26 +639,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
/* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */
char *pdigits;
if (PyUnicode_READY(digits))
return -1;
pdigits = PyUnicode_DATA(digits);
if (PyUnicode_KIND(digits) < kind) {
pdigits = _PyUnicode_AsKind(digits, kind);
if (pdigits == NULL)
return -1;
}
r = _PyUnicode_InsertThousandsGrouping(
writer->buffer, writer->pos,
spec->n_grouped_digits,
pdigits + kind * d_pos,
spec->n_digits, spec->n_min_width,
writer, spec->n_grouped_digits,
digits, d_pos, spec->n_digits,
spec->n_min_width,
locale->grouping, locale->thousands_sep, NULL);
if (r == -1)
return -1;
assert(r == spec->n_grouped_digits);
if (PyUnicode_KIND(digits) < kind)
PyMem_Free(pdigits);
d_pos += spec->n_digits;
}
if (toupper) {
......@@ -994,6 +986,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
inumeric_chars + n_digits, n_remainder, 0,
&locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
......@@ -1139,6 +1134,9 @@ format_float_internal(PyObject *value,
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
index + n_digits, n_remainder, has_decimal,
&locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
......@@ -1322,6 +1320,9 @@ format_complex_internal(PyObject *value,
i_re, i_re + n_re_digits, n_re_remainder,
re_has_decimal, &locale, &tmp_format,
&maxchar);
if (n_re_total == -1) {
goto done;
}
/* Same formatting, but always include a sign, unless the real part is
* going to be omitted, in which case we use whatever sign convention was
......@@ -1332,6 +1333,9 @@ format_complex_internal(PyObject *value,
i_im, i_im + n_im_digits, n_im_remainder,
im_has_decimal, &locale, &tmp_format,
&maxchar);
if (n_im_total == -1) {
goto done;
}
if (skip_re)
n_re_total = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment