Commit 38fd5b64 authored by Tim Peters's avatar Tim Peters

Derived from Martin's SF patch 110609: support unbounded ints in %d,i,u,x,X,o formats.

Note a curious extension to the std C rules:  x, X and o formatting can never produce
a sign character in C, so the '+' and ' ' flags are meaningless for them.  But
unbounded ints *can* produce a sign character under these conversions (no fixed-
width bitstring is wide enough to hold all negative values in 2's-comp form).  So
these flags become meaningful in Python when formatting a Python long which is too
big to fit in a C long.  This required shuffling around existing code, which hacked
x and X conversions to death when both the '#' and '0' flags were specified:  the
hacks weren't strong enough to deal with the simultaneous possibility of the ' ' or
'+' flags too, since signs were always meaningless before for x and X conversions.
Isomorphic shuffling was required in unicodeobject.c.
Also added dozens of non-trivial new unbounded-int test cases to test_format.py.
parent 31575ce8
......@@ -59,6 +59,8 @@ extern DL_IMPORT(void) PyString_Concat(PyObject **, PyObject *);
extern DL_IMPORT(void) PyString_ConcatAndDel(PyObject **, PyObject *);
extern DL_IMPORT(int) _PyString_Resize(PyObject **, int);
extern DL_IMPORT(PyObject *) PyString_Format(PyObject *, PyObject *);
extern DL_IMPORT(PyObject *) _PyString_FormatLong(PyObject*, int, int,
int, char**, int*);
#ifdef INTERN_STRINGS
extern DL_IMPORT(void) PyString_InternInPlace(PyObject **);
......
......@@ -6,32 +6,36 @@ import string, sys
# they crash python)
# test on unicode strings as well
overflowok = 1
def testformat(formatstr, args, output=None):
if verbose:
if output:
print "%s %% %s =? %s ..." %\
(repr(formatstr), repr(args), repr(output)),
else:
print "%s %% %s works? ..." % (repr(formatstr), repr(args)),
try:
result = formatstr % args
except OverflowError:
if verbose:
print 'overflow (this is fine)'
else:
if output and result != output:
if verbose:
print 'no'
print "%s %% %s == %s != %s" %\
(repr(formatstr), repr(args), repr(result), repr(output))
else:
if verbose:
print 'yes'
if verbose:
if output:
print "%s %% %s =? %s ..." %\
(repr(formatstr), repr(args), repr(output)),
else:
print "%s %% %s works? ..." % (repr(formatstr), repr(args)),
try:
result = formatstr % args
except OverflowError:
if not overflowok:
raise
if verbose:
print 'overflow (this is fine)'
else:
if output and result != output:
if verbose:
print 'no'
print "%s %% %s == %s != %s" %\
(repr(formatstr), repr(args), repr(result), repr(output))
else:
if verbose:
print 'yes'
def testboth(formatstr, *args):
testformat(formatstr, *args)
testformat(unicode(formatstr), *args)
testformat(formatstr, *args)
testformat(unicode(formatstr), *args)
testboth("%.1d", (1,), "1")
testboth("%.*d", (sys.maxint,1)) # expect overflow
......@@ -50,3 +54,112 @@ testboth("%#.*g", (110, -1.e+100/3.))
# test some ridiculously large precision, expect overflow
testboth('%12.*f', (123456, 1.0))
# Formatting of long integers. Overflow is not ok
overflowok = 0
testboth("%x", 10L, "a")
testboth("%x", 100000000000L, "174876e800")
testboth("%o", 10L, "12")
testboth("%o", 100000000000L, "1351035564000")
testboth("%d", 10L, "10")
testboth("%d", 100000000000L, "100000000000")
# Make sure big is too big to fit in a 64-bit int, else the unbounded
# int formatting will be sidestepped on some machines. That's vital,
# because bitwise (x, X, o) formats of regular Python ints never
# produce a sign ("+" or "-").
big = 123456789012345678901234567890L
testboth("%d", big, "123456789012345678901234567890")
testboth("%d", -big, "-123456789012345678901234567890")
testboth("%5d", -big, "-123456789012345678901234567890")
testboth("%31d", -big, "-123456789012345678901234567890")
testboth("%32d", -big, " -123456789012345678901234567890")
testboth("%-32d", -big, "-123456789012345678901234567890 ")
testboth("%032d", -big, "-0123456789012345678901234567890")
testboth("%-032d", -big, "-123456789012345678901234567890 ")
testboth("%034d", -big, "-000123456789012345678901234567890")
testboth("%034d", big, "0000123456789012345678901234567890")
testboth("%0+34d", big, "+000123456789012345678901234567890")
testboth("%+34d", big, " +123456789012345678901234567890")
testboth("%34d", big, " 123456789012345678901234567890")
testboth("%.2d", big, "123456789012345678901234567890")
testboth("%.30d", big, "123456789012345678901234567890")
testboth("%.31d", big, "0123456789012345678901234567890")
testboth("%32.31d", big, " 0123456789012345678901234567890")
big = 0x1234567890abcdef12345L # 21 hex digits
testboth("%x", big, "1234567890abcdef12345")
testboth("%x", -big, "-1234567890abcdef12345")
testboth("%5x", -big, "-1234567890abcdef12345")
testboth("%22x", -big, "-1234567890abcdef12345")
testboth("%23x", -big, " -1234567890abcdef12345")
testboth("%-23x", -big, "-1234567890abcdef12345 ")
testboth("%023x", -big, "-01234567890abcdef12345")
testboth("%-023x", -big, "-1234567890abcdef12345 ")
testboth("%025x", -big, "-0001234567890abcdef12345")
testboth("%025x", big, "00001234567890abcdef12345")
testboth("%0+25x", big, "+0001234567890abcdef12345")
testboth("%+25x", big, " +1234567890abcdef12345")
testboth("%25x", big, " 1234567890abcdef12345")
testboth("%.2x", big, "1234567890abcdef12345")
testboth("%.21x", big, "1234567890abcdef12345")
testboth("%.22x", big, "01234567890abcdef12345")
testboth("%23.22x", big, " 01234567890abcdef12345")
testboth("%-23.22x", big, "01234567890abcdef12345 ")
testboth("%X", big, "1234567890ABCDEF12345")
testboth("%#X", big, "0X1234567890ABCDEF12345")
testboth("%#x", big, "0x1234567890abcdef12345")
testboth("%#x", -big, "-0x1234567890abcdef12345")
testboth("%#.23x", -big, "-0x001234567890abcdef12345")
testboth("%#+.23x", big, "+0x001234567890abcdef12345")
testboth("%# .23x", big, " 0x001234567890abcdef12345")
testboth("%#+.23X", big, "+0X001234567890ABCDEF12345")
testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345")
testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345")
testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")
testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
# next one gets two leading zeroes from precision, and another from the
# 0 flag and the width
testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345")
# same, except no 0 flag
testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345")
big = 012345670123456701234567012345670L # 32 octal digits
testboth("%o", big, "12345670123456701234567012345670")
testboth("%o", -big, "-12345670123456701234567012345670")
testboth("%5o", -big, "-12345670123456701234567012345670")
testboth("%33o", -big, "-12345670123456701234567012345670")
testboth("%34o", -big, " -12345670123456701234567012345670")
testboth("%-34o", -big, "-12345670123456701234567012345670 ")
testboth("%034o", -big, "-012345670123456701234567012345670")
testboth("%-034o", -big, "-12345670123456701234567012345670 ")
testboth("%036o", -big, "-00012345670123456701234567012345670")
testboth("%036o", big, "000012345670123456701234567012345670")
testboth("%0+36o", big, "+00012345670123456701234567012345670")
testboth("%+36o", big, " +12345670123456701234567012345670")
testboth("%36o", big, " 12345670123456701234567012345670")
testboth("%.2o", big, "12345670123456701234567012345670")
testboth("%.32o", big, "12345670123456701234567012345670")
testboth("%.33o", big, "012345670123456701234567012345670")
testboth("%34.33o", big, " 012345670123456701234567012345670")
testboth("%-34.33o", big, "012345670123456701234567012345670 ")
testboth("%o", big, "12345670123456701234567012345670")
testboth("%#o", big, "012345670123456701234567012345670")
testboth("%#o", -big, "-012345670123456701234567012345670")
testboth("%#.34o", -big, "-0012345670123456701234567012345670")
testboth("%#+.34o", big, "+0012345670123456701234567012345670")
testboth("%# .34o", big, " 0012345670123456701234567012345670")
testboth("%#+.34o", big, "+0012345670123456701234567012345670")
testboth("%#-+.34o", big, "+0012345670123456701234567012345670")
testboth("%#-+37.34o", big, "+0012345670123456701234567012345670 ")
testboth("%#+37.34o", big, " +0012345670123456701234567012345670")
# next one gets one leading zero from precision
testboth("%.33o", big, "012345670123456701234567012345670")
# base marker shouldn't change that, since "0" is redundant
testboth("%#.33o", big, "012345670123456701234567012345670")
# but reduce precision, and base marker should add a zero
testboth("%#.32o", big, "012345670123456701234567012345670")
# one leading zero from precision, and another from "0" flag & width
testboth("%034.33o", big, "0012345670123456701234567012345670")
# base marker shouldn't change that
testboth("%0#34.33o", big, "0012345670123456701234567012345670")
......@@ -2427,6 +2427,13 @@ getnextarg(PyObject *args, int arglen, int *p_argidx)
return NULL;
}
/* Format codes
* F_LJUST '-'
* F_SIGN '+'
* F_BLANK ' '
* F_ALT '#'
* F_ZERO '0'
*/
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)
#define F_BLANK (1<<2)
......@@ -2464,22 +2471,164 @@ formatfloat(char *buf, size_t buflen, int flags,
return strlen(buf);
}
/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
* Return value: a new PyString*, or NULL if error.
* . *pbuf is set to point into it,
* *plen set to the # of chars following that.
* Caller must decref it when done using pbuf.
* The string starting at *pbuf is of the form
* "-"? ("0x" | "0X")? digit+
* "0x"/"0X" are present only for x and X conversions, with F_ALT
* set in flags. The case of hex digits will be correct,
* There will be at least prec digits, zero-filled on the left if
* necessary to get that many.
* val object to be converted
* flags bitmask of format flags; only F_ALT is looked at
* prec minimum number of digits; 0-fill on left if needed
* type a character in [duoxX]; u acts the same as d
*
* CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints.
*/
PyObject*
_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
char **pbuf, int *plen)
{
PyObject *result = NULL;
char *buf;
int i;
int sign; /* 1 if '-', else 0 */
int len; /* number of characters */
int numdigits; /* len == numnondigits + numdigits */
int numnondigits = 0;
switch (type) {
case 'd':
case 'u':
result = val->ob_type->tp_str(val);
break;
case 'o':
result = val->ob_type->tp_as_number->nb_oct(val);
break;
case 'x':
case 'X':
numnondigits = 2;
result = val->ob_type->tp_as_number->nb_hex(val);
break;
default:
assert(!"'type' not in [duoxX]");
}
if (!result)
return NULL;
/* To modify the string in-place, there can only be one reference. */
if (result->ob_refcnt != 1) {
PyErr_BadInternalCall();
return NULL;
}
buf = PyString_AsString(result);
len = PyString_Size(result);
if (buf[len-1] == 'L') {
--len;
buf[len] = '\0';
}
sign = buf[0] == '-';
numnondigits += sign;
numdigits = len - numnondigits;
assert(numdigits > 0);
/* Get rid of base marker unless F_ALT */
if ((flags & F_ALT) == 0) {
/* Need to skip 0x, 0X or 0. */
int skipped = 0;
switch (type) {
case 'o':
assert(buf[sign] == '0');
/* If 0 is only digit, leave it alone. */
if (numdigits > 1) {
skipped = 1;
--numdigits;
}
break;
case 'x':
case 'X':
assert(buf[sign] == '0');
assert(buf[sign + 1] == 'x');
skipped = 2;
numnondigits -= 2;
break;
}
if (skipped) {
buf += skipped;
len -= skipped;
if (sign)
buf[0] = '-';
}
assert(len == numnondigits + numdigits);
assert(numdigits > 0);
}
/* Fill with leading zeroes to meet minimum width. */
if (prec > numdigits) {
PyObject *r1 = PyString_FromStringAndSize(NULL,
numnondigits + prec);
char *b1;
if (!r1) {
Py_DECREF(result);
return NULL;
}
b1 = PyString_AS_STRING(r1);
for (i = 0; i < numnondigits; ++i)
*b1++ = *buf++;
for (i = 0; i < prec - numdigits; i++)
*b1++ = '0';
for (i = 0; i < numdigits; i++)
*b1++ = *buf++;
*b1 = '\0';
Py_DECREF(result);
result = r1;
buf = PyString_AS_STRING(result);
len = numnondigits + prec;
}
/* Fix up case for hex conversions. */
switch (type) {
case 'x':
/* Need to convert all upper case letters to lower case. */
for (i = 0; i < len; i++)
if (buf[i] >= 'A' && buf[i] <= 'F')
buf[i] += 'a'-'A';
break;
case 'X':
/* Need to convert 0x to 0X (and -0x to -0X). */
if (buf[sign + 1] == 'x')
buf[sign + 1] = 'X';
break;
}
*pbuf = buf;
*plen = len;
return result;
}
static int
formatint(char *buf, size_t buflen, int flags,
int prec, int type, PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
char fmt[20];
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ 1 + 1 = 24 */
char fmt[64]; /* plenty big enough! */
long x;
if (!PyArg_Parse(v, "l;int argument required", &x))
return -1;
if (prec < 0)
prec = 1;
sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
/* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
/* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
PyErr_SetString(PyExc_OverflowError,
"formatted integer is too long (precision too long?)");
return -1;
......@@ -2752,25 +2901,29 @@ PyString_Format(PyObject *format, PyObject *args)
case 'X':
if (c == 'i')
c = 'd';
pbuf = formatbuf;
len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
if (len < 0)
goto error;
sign = (c == 'd');
if (flags&F_ZERO) {
fill = '0';
if ((flags&F_ALT) &&
(c == 'x' || c == 'X') &&
pbuf[0] == '0' && pbuf[1] == c) {
*res++ = *pbuf++;
*res++ = *pbuf++;
rescnt -= 2;
len -= 2;
width -= 2;
if (width < 0)
width = 0;
}
if (PyLong_Check(v) && PyLong_AsLong(v) == -1
&& PyErr_Occurred()) {
/* Too big for a C long. */
PyErr_Clear();
temp = _PyString_FormatLong(v, flags,
prec, c, &pbuf, &len);
if (!temp)
goto error;
/* unbounded ints can always produce
a sign character! */
sign = 1;
}
else {
pbuf = formatbuf;
len = formatint(pbuf, sizeof(formatbuf),
flags, prec, c, v);
if (len < 0)
goto error;
/* only d conversion is signed */
sign = c == 'd';
}
if (flags & F_ZERO)
fill = '0';
break;
case 'e':
case 'E':
......@@ -2782,7 +2935,7 @@ PyString_Format(PyObject *format, PyObject *args)
if (len < 0)
goto error;
sign = 1;
if (flags&F_ZERO)
if (flags & F_ZERO)
fill = '0';
break;
case 'c':
......@@ -2807,11 +2960,11 @@ PyString_Format(PyObject *format, PyObject *args)
else if (flags & F_BLANK)
sign = ' ';
else
sign = '\0';
sign = 0;
}
if (width < len)
width = len;
if (rescnt < width + (sign != '\0')) {
if (rescnt < width + (sign != 0)) {
reslen -= rescnt;
rescnt = width + fmtcnt + 100;
reslen += rescnt;
......@@ -2827,14 +2980,36 @@ PyString_Format(PyObject *format, PyObject *args)
if (width > len)
width--;
}
if (width > len && !(flags&F_LJUST)) {
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
if (fill != ' ') {
*res++ = *pbuf++;
*res++ = *pbuf++;
}
rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
if (sign && fill == ' ')
*res++ = sign;
if (fill == ' ') {
if (sign)
*res++ = sign;
if ((flags & F_ALT) &&
(c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
*res++ = *pbuf++;
*res++ = *pbuf++;
}
}
memcpy(res, pbuf, len);
res += len;
rescnt -= len;
......
......@@ -4668,6 +4668,25 @@ formatfloat(Py_UNICODE *buf,
return usprintf(buf, fmt, x);
}
static PyObject*
formatlong(PyObject *val, int flags, int prec, int type)
{
char *buf;
int i, len;
PyObject *str; /* temporary string object. */
PyUnicodeObject *result;
str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
if (!str)
return NULL;
result = _PyUnicode_New(len);
for (i = 0; i < len; i++)
result->str[i] = buf[i];
result->str[len] = 0;
Py_DECREF(str);
return (PyObject*)result;
}
static int
formatint(Py_UNICODE *buf,
size_t buflen,
......@@ -4677,8 +4696,9 @@ formatint(Py_UNICODE *buf,
PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
char fmt[20];
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ 1 + 1 = 24*/
char fmt[64]; /* plenty big enough! */
long x;
x = PyInt_AsLong(v);
......@@ -5006,26 +5026,29 @@ PyObject *PyUnicode_Format(PyObject *format,
case 'X':
if (c == 'i')
c = 'd';
pbuf = formatbuf;
len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
flags, prec, c, v);
if (len < 0)
goto onError;
sign = (c == 'd');
if (flags & F_ZERO) {
fill = '0';
if ((flags&F_ALT) &&
(c == 'x' || c == 'X') &&
pbuf[0] == '0' && pbuf[1] == c) {
*res++ = *pbuf++;
*res++ = *pbuf++;
rescnt -= 2;
len -= 2;
width -= 2;
if (width < 0)
width = 0;
}
if (PyLong_Check(v) && PyLong_AsLong(v) == -1
&& PyErr_Occurred()) {
PyErr_Clear();
temp = formatlong(v, flags, prec, c);
if (!temp)
goto onError;
pbuf = PyUnicode_AS_UNICODE(temp);
len = PyUnicode_GET_SIZE(temp);
/* unbounded ints can always produce
a sign character! */
sign = 1;
}
else {
pbuf = formatbuf;
len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
flags, prec, c, v);
if (len < 0)
goto onError;
/* only d conversion is signed */
sign = c == 'd';
}
if (flags & F_ZERO)
fill = '0';
break;
case 'e':
......@@ -5039,7 +5062,7 @@ PyObject *PyUnicode_Format(PyObject *format,
if (len < 0)
goto onError;
sign = 1;
if (flags&F_ZERO)
if (flags & F_ZERO)
fill = '0';
break;
......@@ -5086,14 +5109,35 @@ PyObject *PyUnicode_Format(PyObject *format,
if (width > len)
width--;
}
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
if (fill != ' ') {
*res++ = *pbuf++;
*res++ = *pbuf++;
}
rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
if (sign && fill == ' ')
*res++ = sign;
if (fill == ' ') {
if (sign)
*res++ = sign;
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
*res++ = *pbuf++;
*res++ = *pbuf++;
}
}
memcpy(res, pbuf, len * sizeof(Py_UNICODE));
res += len;
rescnt -= len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment