Commit 45055643 authored by Stefan Behnel's avatar Stefan Behnel

optimise C integer formatting

parent 9e37fb79
...@@ -3012,10 +3012,13 @@ class FormattedValueNode(ExprNode): ...@@ -3012,10 +3012,13 @@ class FormattedValueNode(ExprNode):
# value ExprNode The expression itself # value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none) # conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__ # format_spec JoinedStrNode or None Format string passed to __format__
# c_format_spec str or None Formatting that can be done at the C level
subexprs = ['value', 'format_spec'] subexprs = ['value', 'format_spec']
type = unicode_type type = unicode_type
is_temp = True is_temp = True
c_format_spec = None
find_conversion_func = { find_conversion_func = {
's': 'PyObject_Str', 's': 'PyObject_Str',
...@@ -3029,14 +3032,31 @@ class FormattedValueNode(ExprNode): ...@@ -3029,14 +3032,31 @@ class FormattedValueNode(ExprNode):
def analyse_types(self, env): def analyse_types(self, env):
self.value = self.value.analyse_types(env) self.value = self.value.analyse_types(env)
if not self.format_spec or self.format_spec.is_string_literal:
c_format_spec = self.format_spec.value if self.format_spec else None
if self.value.type.can_coerce_to_pystring(env, format_spec=c_format_spec):
if c_format_spec is None and self.value.type.is_int:
c_format_spec = 'd'
self.c_format_spec = c_format_spec
if self.format_spec: if self.format_spec:
self.format_spec = self.format_spec.analyse_types(env).coerce_to_pyobject(env) self.format_spec = self.format_spec.analyse_types(env).coerce_to_pyobject(env)
elif not self.conversion_char and self.value.type.can_coerce_to_pyunicode(env): if not self.c_format_spec:
return FormattedCValueNode(self.pos, value=self.value) self.value = self.value.coerce_to_pyobject(env)
self.value = self.value.coerce_to_pyobject(env)
return self return self
def generate_result_code(self, code): def generate_result_code(self, code):
if self.c_format_spec and not self.value.type.is_pyobject:
convert_func = self.value.type.to_pystring_function(code)
code.putln("%s = %s(%s, '%s'); %s" % (
self.result(),
convert_func,
self.value.result(),
self.c_format_spec,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
return
value_result = self.value.py_result() value_result = self.value.py_result()
if self.format_spec: if self.format_spec:
format_func = '__Pyx_PyObject_Format' format_func = '__Pyx_PyObject_Format'
...@@ -3067,20 +3087,6 @@ class FormattedValueNode(ExprNode): ...@@ -3067,20 +3087,6 @@ class FormattedValueNode(ExprNode):
code.put_gotref(self.py_result()) code.put_gotref(self.py_result())
class FormattedCValueNode(FormattedValueNode):
conversion_char = None
format_spec = None
def generate_result_code(self, code):
convert_func = self.value.type.to_pyunicode_utility_code(code)
code.putln("%s = %s(%s); %s" % (
self.result(),
convert_func,
self.value.result(),
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
#------------------------------------------------------------------- #-------------------------------------------------------------------
# #
# Parallel nodes (cython.parallel.thread(savailable|id)) # Parallel nodes (cython.parallel.thread(savailable|id))
......
...@@ -30,7 +30,7 @@ class BaseType(object): ...@@ -30,7 +30,7 @@ class BaseType(object):
def can_coerce_to_pyobject(self, env): def can_coerce_to_pyobject(self, env):
return False return False
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False
def cast_code(self, expr_code): def cast_code(self, expr_code):
...@@ -1629,10 +1629,12 @@ class CIntType(CNumericType): ...@@ -1629,10 +1629,12 @@ class CIntType(CNumericType):
def can_coerce_to_pyobject(self, env): def can_coerce_to_pyobject(self, env):
return True return True
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
if format_spec and format_spec not in ('o', 'd', 'x', 'X'):
return False
return True return True
def to_pyunicode_utility_code(self, code): def to_pystring_function(self, code):
if self.to_pyunicode_utility is None: if self.to_pyunicode_utility is None:
utility_code_name = "__Pyx_PyUnicode_From_" + self.specialization_name() utility_code_name = "__Pyx_PyUnicode_From_" + self.specialization_name()
to_pyunicode_utility = TempitaUtilityCode.load_cached( to_pyunicode_utility = TempitaUtilityCode.load_cached(
...@@ -1752,7 +1754,7 @@ class CReturnCodeType(CIntType): ...@@ -1752,7 +1754,7 @@ class CReturnCodeType(CIntType):
is_returncode = True is_returncode = True
exception_check = False exception_check = False
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False
...@@ -1762,7 +1764,7 @@ class CBIntType(CIntType): ...@@ -1762,7 +1764,7 @@ class CBIntType(CIntType):
from_py_function = "__Pyx_PyObject_IsTrue" from_py_function = "__Pyx_PyObject_IsTrue"
exception_check = 1 # for C++ bool exception_check = 1 # for C++ bool
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False
def declaration_code(self, entity_code, def declaration_code(self, entity_code,
...@@ -1799,7 +1801,7 @@ class CPyUCS4IntType(CIntType): ...@@ -1799,7 +1801,7 @@ class CPyUCS4IntType(CIntType):
to_py_function = "PyUnicode_FromOrdinal" to_py_function = "PyUnicode_FromOrdinal"
from_py_function = "__Pyx_PyObject_AsPy_UCS4" from_py_function = "__Pyx_PyObject_AsPy_UCS4"
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False
def create_from_py_utility_code(self, env): def create_from_py_utility_code(self, env):
...@@ -1823,7 +1825,7 @@ class CPyUnicodeIntType(CIntType): ...@@ -1823,7 +1825,7 @@ class CPyUnicodeIntType(CIntType):
to_py_function = "PyUnicode_FromOrdinal" to_py_function = "PyUnicode_FromOrdinal"
from_py_function = "__Pyx_PyObject_AsPy_UNICODE" from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
def can_coerce_to_pyunicode(self, env): def can_coerce_to_pystring(self, env, format_spec=None):
return False return False
def create_from_py_utility_code(self, env): def create_from_py_utility_code(self, env):
......
...@@ -570,51 +570,87 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) { ...@@ -570,51 +570,87 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
/////////////// CIntToPyUnicode.proto /////////////// /////////////// CIntToPyUnicode.proto ///////////////
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value); static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char);
/////////////// CIntToPyUnicode /////////////// /////////////// CIntToPyUnicode ///////////////
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) { // NOTE: inlining because "format_char" is always a constant, which collapses lots of code below
// simple and conservative string allocation on the stack
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char) {
// simple and conservative C string allocation on the stack: each byte gives at most 3 digits, plus sign
char digits[sizeof({{TYPE}})*3+2]; char digits[sizeof({{TYPE}})*3+2];
int length = -1; // dpos points to end of digits array + 1 at the beginning to allow for pre-decrement looping
char *dpos = digits + sizeof({{TYPE}})*3+2;
int length;
PyObject *uval;
{{TYPE}} remaining;
const {{TYPE}} neg_one = ({{TYPE}}) -1, const_zero = ({{TYPE}}) 0; const {{TYPE}} neg_one = ({{TYPE}}) -1, const_zero = ({{TYPE}}) 0;
const int is_unsigned = neg_one > const_zero; const int is_unsigned = neg_one > const_zero;
if (is_unsigned) { // single character unicode strings are cached in CPython => use PyUnicode_FromOrdinal() for them
if (sizeof({{TYPE}}) <= sizeof(unsigned int)) { if (unlikely((is_unsigned || value >= const_zero) && (
length = sprintf(digits, "%u", (unsigned int) value); (format_char == 'o') ? value <= 7 : (format_char == 'd') ? value <= 9 : value <= 15))) {
} else if (sizeof({{TYPE}}) <= sizeof(unsigned long)) { return PyUnicode_FromOrdinal(
length = sprintf(digits, "%lu", (unsigned long) value); ((int) value) + (((int) value) <= 9 ? '0' : (format_char == 'x' ? 'a' : 'A') - 10));
} else if (sizeof({{TYPE}}) <= sizeof(unsigned PY_LONG_LONG)) { }
length = sprintf(digits, "%llu", (unsigned PY_LONG_LONG) value);
} // surprise: even trivial sprintf() calls don't get optimised in gcc (4.8)
} else { remaining = value;
if (sizeof({{TYPE}}) <= sizeof(int)) { length = 0;
length = sprintf(digits, "%d", (int) value); while (remaining != 0) {
} else if (sizeof({{TYPE}}) <= sizeof(long)) { char digit;
length = sprintf(digits, "%ld", (long) value); switch (format_char) {
} else if (sizeof({{TYPE}}) <= sizeof(PY_LONG_LONG)) { case 'o':
length = sprintf(digits, "%lld", (PY_LONG_LONG) value); digit = '0' + abs(remaining % 8);
remaining = remaining / 8;
break;
case 'd':
digit = '0' + abs(remaining % 10);
remaining = remaining / 10;
break;
case 'x':
case 'X':
digit = '0' + abs(remaining % 16);
remaining = remaining / 16;
if (digit > '9')
digit = digit - '9' - 1 + (format_char == 'x' ? 'a' : 'A');
break;
default:
assert(0);
} }
*(--dpos) = digit;
++length;
} }
if (unlikely(length < 0)) { if (!is_unsigned && value <= neg_one) {
// huge integer type or (unlikely) error in sprintf() => use slow conversion *(--dpos) = '-';
PyObject *pylong, *uval = NULL; ++length;
int one = 1; int little = (int)*(unsigned char *)&one; }
unsigned char *bytes = (unsigned char *)&value;
pylong = _PyLong_FromByteArray(bytes, sizeof({{TYPE}}), little, !is_unsigned); #if CYTHON_COMPILING_IN_CPYTHON
if (likely(pylong)) { {
#if PY_MAJOR_VERSION >= 3 int i;
uval = PyObject_Str(pylong); #if PY_MAJOR_VERSION > 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3
void *udata;
uval = PyUnicode_New(length, 127);
if (unlikely(!uval)) return NULL;
udata = PyUnicode_DATA(uval);
for (i=0; i<length; i++) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, dpos[i]);
}
#else #else
uval = PyObject_Unicode(pylong); Py_UNICODE *udata;
#endif uval = PyUnicode_FromUnicode(NULL, length);
Py_DECREF(pylong); if (unlikely(!uval)) return uval;
udata = PyUnicode_AS_UNICODE(uval);
for (i=0; i<length; i++) {
udata[i] = dpos[i];
} }
#endif
return uval; return uval;
} }
return PyUnicode_DecodeASCII(digits, length, NULL); #else
return PyUnicode_DecodeASCII(dpos, length, NULL);
#endif
} }
......
...@@ -32,7 +32,7 @@ def format2(ab, cd): ...@@ -32,7 +32,7 @@ def format2(ab, cd):
return a, b, c return a, b, c
def format_c_numbers(char c, short s, int n, long l, float f, double d): def format_c_numbers(signed char c, short s, int n, long l, float f, double d):
""" """
>>> s1, s2, s3, s4 = format_c_numbers(123, 135, 12, 12312312, 2.3456, 3.1415926) >>> s1, s2, s3, s4 = format_c_numbers(123, 135, 12, 12312312, 2.3456, 3.1415926)
>>> print(s1) >>> print(s1)
...@@ -44,6 +44,16 @@ def format_c_numbers(char c, short s, int n, long l, float f, double d): ...@@ -44,6 +44,16 @@ def format_c_numbers(char c, short s, int n, long l, float f, double d):
>>> print(s4) >>> print(s4)
C 3.14 C 3.14
>>> s1, s2, s3, s4 = format_c_numbers(-123, -135, -12, -12312312, -2.3456, -3.1415926)
>>> print(s1)
-123-135-12312312-12-2.35
>>> print(s2)
-3.14-2.3
>>> print(s3)
-12f
>>> print(s4)
-C-3.14
""" """
s1 = f"{c}{s:4}{l}{n}{f:.3}" s1 = f"{c}{s:4}{l}{n}{f:.3}"
assert isinstance(s1, unicode), type(s1) assert isinstance(s1, unicode), type(s1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment