Commit 2b9a018a authored by Stefan Behnel's avatar Stefan Behnel

implement width based format specs for integer formatting

parent f3a876ff
...@@ -1634,10 +1634,33 @@ class CIntType(CNumericType): ...@@ -1634,10 +1634,33 @@ class CIntType(CNumericType):
def can_coerce_to_pyobject(self, env): def can_coerce_to_pyobject(self, env):
return True return True
@staticmethod
def _parse_format(format_spec):
padding = ' '
if not format_spec:
return ('d', 0, padding)
format_type = format_spec[-1]
if format_type in ('o', 'd', 'x', 'X'):
prefix = format_spec[:-1]
elif format_type.isdigit():
format_type = 'd'
prefix = format_spec
else:
return (None, 0, padding)
if not prefix:
return (format_type, 0, padding)
if prefix[0] == '-':
prefix = prefix[1:]
if prefix and prefix[0] == '0':
padding = '0'
prefix = prefix.lstrip('0')
if prefix.isdigit():
return (format_type, int(prefix), padding)
return (None, 0, padding)
def can_coerce_to_pystring(self, env, format_spec=None): def can_coerce_to_pystring(self, env, format_spec=None):
if format_spec and format_spec not in ('o', 'd', 'x', 'X'): format_type, width, padding = self._parse_format(format_spec)
return False return format_type is not None and width <= 2**30
return True
def convert_to_pystring(self, cvalue, code, format_spec=None): def convert_to_pystring(self, cvalue, code, format_spec=None):
if self.to_pyunicode_utility is None: if self.to_pyunicode_utility is None:
...@@ -1650,7 +1673,8 @@ class CIntType(CNumericType): ...@@ -1650,7 +1673,8 @@ class CIntType(CNumericType):
else: else:
utility_code_name, to_pyunicode_utility = self.to_pyunicode_utility utility_code_name, to_pyunicode_utility = self.to_pyunicode_utility
code.globalstate.use_utility_code(to_pyunicode_utility) code.globalstate.use_utility_code(to_pyunicode_utility)
return "%s(%s, '%s')" % (utility_code_name, cvalue, format_spec or 'd') format_type, width, padding_char = self._parse_format(format_spec)
return "%s(%s, %d, '%s', '%s')" % (utility_code_name, cvalue, width, padding_char, format_type)
def create_to_py_utility_code(self, env): def create_to_py_utility_code(self, env):
if type(self).to_py_function is None: if type(self).to_py_function is None:
...@@ -1826,7 +1850,7 @@ class CPyUCS4IntType(CIntType): ...@@ -1826,7 +1850,7 @@ class CPyUCS4IntType(CIntType):
from_py_function = "__Pyx_PyObject_AsPy_UCS4" from_py_function = "__Pyx_PyObject_AsPy_UCS4"
def can_coerce_to_pystring(self, env, format_spec=None): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False # does the right thing anyway
def create_from_py_utility_code(self, env): def create_from_py_utility_code(self, env):
env.use_utility_code(UtilityCode.load_cached("ObjectAsUCS4", "TypeConversion.c")) env.use_utility_code(UtilityCode.load_cached("ObjectAsUCS4", "TypeConversion.c"))
...@@ -1850,7 +1874,7 @@ class CPyUnicodeIntType(CIntType): ...@@ -1850,7 +1874,7 @@ class CPyUnicodeIntType(CIntType):
from_py_function = "__Pyx_PyObject_AsPy_UNICODE" from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
def can_coerce_to_pystring(self, env, format_spec=None): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False # does the right thing anyway
def create_from_py_utility_code(self, env): def create_from_py_utility_code(self, env):
env.use_utility_code(UtilityCode.load_cached("ObjectAsPyUnicode", "TypeConversion.c")) env.use_utility_code(UtilityCode.load_cached("ObjectAsPyUnicode", "TypeConversion.c"))
......
...@@ -570,24 +570,26 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) { ...@@ -570,24 +570,26 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
/////////////// CIntToPyUnicode.proto /////////////// /////////////// CIntToPyUnicode.proto ///////////////
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char); static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, int width, char padding_char, char format_char);
/////////////// CIntToPyUnicode /////////////// /////////////// CIntToPyUnicode ///////////////
//@requires: BuildPyUnicode
// NOTE: inlining because "format_char" is always a constant, which collapses lots of code below // NOTE: inlining because most arguments are constant, which collapses lots of code below
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char) { static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, int width, char padding_char, char format_char) {
// simple and conservative C string allocation on the stack: each byte gives at most 3 digits, plus sign // simple and conservative C string allocation on the stack: each byte gives at most 3 digits, plus sign
char digits[sizeof({{TYPE}})*3+2]; char digits[sizeof({{TYPE}})*3+2];
// dpos points to end of digits array + 1 at the beginning to allow for pre-decrement looping // dpos points to end of digits array + 1 at the beginning to allow for pre-decrement looping
char *dpos = digits + sizeof({{TYPE}})*3+2; char *dpos = digits + sizeof({{TYPE}})*3+2;
int length; Py_ssize_t ulength;
int length, prepend_sign;
{{TYPE}} remaining; {{TYPE}} remaining;
const {{TYPE}} neg_one = ({{TYPE}}) -1, const_zero = ({{TYPE}}) 0; const {{TYPE}} neg_one = ({{TYPE}}) -1, const_zero = ({{TYPE}}) 0;
const int is_unsigned = neg_one > const_zero; const int is_unsigned = neg_one > const_zero;
// single character unicode strings are cached in CPython => use PyUnicode_FromOrdinal() for them // single character unicode strings are cached in CPython => use PyUnicode_FromOrdinal() for them
if (unlikely((is_unsigned || value >= const_zero) && ( if (unlikely((is_unsigned || value >= const_zero) && (width <= 1) && (
(format_char == 'o') ? value <= 7 : (format_char == 'd') ? value <= 9 : value <= 15))) { (format_char == 'o') ? value <= 7 : (format_char == 'd') ? value <= 9 : value <= 15))) {
return PyUnicode_FromOrdinal( return PyUnicode_FromOrdinal(
((int) value) + (((int) value) <= 9 ? '0' : (format_char == 'x' ? 'a' : 'A') - 10)); ((int) value) + (((int) value) <= 9 ? '0' : (format_char == 'x' ? 'a' : 'A') - 10));
...@@ -620,37 +622,121 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_ch ...@@ -620,37 +622,121 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_ch
*(--dpos) = digit; *(--dpos) = digit;
++length; ++length;
} }
ulength = length;
prepend_sign = 0;
if (!is_unsigned && value <= neg_one) { if (!is_unsigned && value <= neg_one) {
if (padding_char == ' ' || width <= length + 1) {
*(--dpos) = '-'; *(--dpos) = '-';
++length; ++length;
} else {
prepend_sign = 1;
}
++ulength;
}
if (width > ulength) {
ulength = width;
} }
return __Pyx_PyUnicode_Build(ulength, dpos, length, prepend_sign, padding_char);
}
/////////////// BuildPyUnicode.proto ///////////////
static PyObject* __Pyx_PyUnicode_Build(Py_ssize_t ulength, char* chars, int clength,
int prepend_sign, char padding_char);
/////////////// BuildPyUnicode ///////////////
static PyObject* __Pyx_PyUnicode_Build(Py_ssize_t ulength, char* chars, int clength,
int prepend_sign, char padding_char) {
PyObject *uval;
Py_ssize_t uoffset = ulength - clength;
#if CYTHON_COMPILING_IN_CPYTHON #if CYTHON_COMPILING_IN_CPYTHON
{
int i; int i;
PyObject *uval;
#if PY_MAJOR_VERSION > 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3 #if PY_MAJOR_VERSION > 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3
// Py 3.3+ (post PEP-393)
void *udata; void *udata;
uval = PyUnicode_New(length, 127); uval = PyUnicode_New(ulength, 127);
if (unlikely(!uval)) return NULL; if (unlikely(!uval)) return NULL;
udata = PyUnicode_DATA(uval); udata = PyUnicode_DATA(uval);
for (i=0; i<length; i++) { if (uoffset > 0) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, dpos[i]); i = 0;
if (prepend_sign) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, 0, '-');
i++;
}
for (; i < uoffset; i++) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, padding_char);
}
}
for (i=0; i < clength; i++) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, uoffset+i, chars[i]);
} }
#else #else
// Py 2.x/3.2 (pre PEP-393)
Py_UNICODE *udata; Py_UNICODE *udata;
uval = PyUnicode_FromUnicode(NULL, length); uval = PyUnicode_FromUnicode(NULL, ulength);
if (unlikely(!uval)) return NULL; if (unlikely(!uval)) return NULL;
udata = PyUnicode_AS_UNICODE(uval); udata = PyUnicode_AS_UNICODE(uval);
for (i=0; i<length; i++) { if (uoffset > 0) {
udata[i] = dpos[i]; i = 0;
if (prepend_sign) {
udata[0] = '-';
i++;
} }
#endif for (; i < uoffset; i++) {
return uval; udata[i] = padding_char;
}
}
for (i=0; i < clength; i++) {
udata[uoffset+i] = chars[i];
} }
#endif
#else #else
return PyUnicode_DecodeASCII(dpos, length, NULL); // non-CPython
{
uval = NULL;
PyObject *sign = NULL, *padding = NULL;
if (uoffset > 0) {
prepend_sign = !!prepend_sign;
if (uoffset > prepend_sign) {
padding = PyUnicode_FromOrdinal(padding_char);
if (likely(padding) && uoffset > prepend_sign + 1) {
PyObject *tmp;
PyObject *repeat = PyInt_FromSize_t(uoffset - prepend_sign);
if (unlikely(!repeat)) goto done_or_error;
tmp = PyNumber_Multiply(padding, repeat);
Py_DECREF(repeat);
Py_DECREF(padding);
padding = tmp;
}
if (unlikely(!padding)) goto done_or_error;
}
if (prepend_sign) {
sign = PyUnicode_FromOrdinal('-');
if (unlikely(!sign)) goto done_or_error;
}
}
uval = PyUnicode_DecodeASCII(chars, clength, NULL);
if (likely(uval) && padding) {
PyObject *tmp = PyNumber_Add(padding, uval);
Py_DECREF(uval);
uval = tmp;
}
if (likely(uval) && sign) {
PyObject *tmp = PyNumber_Add(sign, uval);
Py_DECREF(uval);
uval = tmp;
}
done_or_error:
Py_XDECREF(padding);
Py_XDECREF(sign);
}
#endif #endif
return uval;
} }
......
...@@ -5,6 +5,10 @@ ...@@ -5,6 +5,10 @@
# Cython specific PEP 498 tests in addition to test_fstring.pyx from CPython # Cython specific PEP 498 tests in addition to test_fstring.pyx from CPython
#### ####
import sys
IS_PYPY = hasattr(sys, 'pypy_version_info')
def format2(ab, cd): def format2(ab, cd):
""" """
>>> a, b, c = format2(1, 2) >>> a, b, c = format2(1, 2)
...@@ -28,7 +32,7 @@ def format2(ab, cd): ...@@ -28,7 +32,7 @@ def format2(ab, cd):
b = f"{ab}cd" b = f"{ab}cd"
assert isinstance(b, unicode), type(b) assert isinstance(b, unicode), type(b)
c = f"{ab}{cd}" c = f"{ab}{cd}"
assert isinstance(c, unicode), type(c) assert isinstance(c, unicode) or (IS_PYPY and isinstance(c, str)), type(c)
return a, b, c return a, b, c
...@@ -42,7 +46,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d): ...@@ -42,7 +46,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d):
>>> print(s3) >>> print(s3)
12f 12f
>>> print(s4) >>> print(s4)
C 3.14 0C00C 3.14
>>> s1, s2, s3, s4 = format_c_numbers(-123, -135, -12, -12312312, -2.3456, -3.1415926) >>> s1, s2, s3, s4 = format_c_numbers(-123, -135, -12, -12312312, -2.3456, -3.1415926)
>>> print(s1) >>> print(s1)
...@@ -52,7 +56,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d): ...@@ -52,7 +56,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d):
>>> print(s3) >>> print(s3)
-12f -12f
>>> print(s4) >>> print(s4)
-C-3.14 -C-0C-3.14
""" """
s1 = f"{c}{s:4}{l}{n}{f:.3}" s1 = f"{c}{s:4}{l}{n}{f:.3}"
...@@ -61,7 +65,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d): ...@@ -61,7 +65,7 @@ def format_c_numbers(signed char c, short s, int n, long l, float f, double d):
assert isinstance(s2, unicode), type(s2) assert isinstance(s2, unicode), type(s2)
s3 = f"{n:-4}f" s3 = f"{n:-4}f"
assert isinstance(s3, unicode), type(s3) assert isinstance(s3, unicode), type(s3)
s4 = f"{n:X}{d:5.3}" s4 = f"{n:02X}{n:03X}{d:5.3}"
assert isinstance(s4, unicode), type(s4) assert isinstance(s4, unicode), type(s4)
return s1, s2, s3, s4 return s1, s2, s3, s4
...@@ -163,9 +167,9 @@ def format_str(str s1, str s2): ...@@ -163,9 +167,9 @@ def format_str(str s1, str s2):
sabcuxyz sabcuxyz
""" """
a = f"{s1}{s2}" a = f"{s1}{s2}"
assert isinstance(a, unicode), type(a) assert isinstance(a, unicode) or (IS_PYPY and isinstance(a, str)), type(a)
b = f"{s2}{s1}" b = f"{s2}{s1}"
assert isinstance(b, unicode), type(b) assert isinstance(b, unicode) or (IS_PYPY and isinstance(a, str)), type(b)
c = f"u{s2}s{s1}" c = f"u{s2}s{s1}"
assert isinstance(c, unicode), type(c) assert isinstance(c, unicode), type(c)
d = f"s{s1}u{s2}" d = f"s{s1}u{s2}"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment