Commit 45055643 authored by Stefan Behnel's avatar Stefan Behnel

optimise C integer formatting

parent 9e37fb79
......@@ -3012,10 +3012,13 @@ class FormattedValueNode(ExprNode):
# value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__
# c_format_spec str or None Formatting that can be done at the C level
subexprs = ['value', 'format_spec']
type = unicode_type
is_temp = True
c_format_spec = None
find_conversion_func = {
's': 'PyObject_Str',
......@@ -3029,14 +3032,31 @@ class FormattedValueNode(ExprNode):
def analyse_types(self, env):
self.value = self.value.analyse_types(env)
if not self.format_spec or self.format_spec.is_string_literal:
c_format_spec = self.format_spec.value if self.format_spec else None
if self.value.type.can_coerce_to_pystring(env, format_spec=c_format_spec):
if c_format_spec is None and self.value.type.is_int:
c_format_spec = 'd'
self.c_format_spec = c_format_spec
if self.format_spec:
self.format_spec = self.format_spec.analyse_types(env).coerce_to_pyobject(env)
elif not self.conversion_char and self.value.type.can_coerce_to_pyunicode(env):
return FormattedCValueNode(self.pos, value=self.value)
self.value = self.value.coerce_to_pyobject(env)
if not self.c_format_spec:
self.value = self.value.coerce_to_pyobject(env)
return self
def generate_result_code(self, code):
if self.c_format_spec and not self.value.type.is_pyobject:
convert_func = self.value.type.to_pystring_function(code)
code.putln("%s = %s(%s, '%s'); %s" % (
self.result(),
convert_func,
self.value.result(),
self.c_format_spec,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
return
value_result = self.value.py_result()
if self.format_spec:
format_func = '__Pyx_PyObject_Format'
......@@ -3067,20 +3087,6 @@ class FormattedValueNode(ExprNode):
code.put_gotref(self.py_result())
class FormattedCValueNode(FormattedValueNode):
conversion_char = None
format_spec = None
def generate_result_code(self, code):
convert_func = self.value.type.to_pyunicode_utility_code(code)
code.putln("%s = %s(%s); %s" % (
self.result(),
convert_func,
self.value.result(),
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
#-------------------------------------------------------------------
#
# Parallel nodes (cython.parallel.thread(savailable|id))
......
......@@ -30,7 +30,7 @@ class BaseType(object):
def can_coerce_to_pyobject(self, env):
return False
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
return False
def cast_code(self, expr_code):
......@@ -1629,10 +1629,12 @@ class CIntType(CNumericType):
def can_coerce_to_pyobject(self, env):
return True
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
if format_spec and format_spec not in ('o', 'd', 'x', 'X'):
return False
return True
def to_pyunicode_utility_code(self, code):
def to_pystring_function(self, code):
if self.to_pyunicode_utility is None:
utility_code_name = "__Pyx_PyUnicode_From_" + self.specialization_name()
to_pyunicode_utility = TempitaUtilityCode.load_cached(
......@@ -1752,7 +1754,7 @@ class CReturnCodeType(CIntType):
is_returncode = True
exception_check = False
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
return False
......@@ -1762,7 +1764,7 @@ class CBIntType(CIntType):
from_py_function = "__Pyx_PyObject_IsTrue"
exception_check = 1 # for C++ bool
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
return False
def declaration_code(self, entity_code,
......@@ -1799,7 +1801,7 @@ class CPyUCS4IntType(CIntType):
to_py_function = "PyUnicode_FromOrdinal"
from_py_function = "__Pyx_PyObject_AsPy_UCS4"
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
return False
def create_from_py_utility_code(self, env):
......@@ -1823,7 +1825,7 @@ class CPyUnicodeIntType(CIntType):
to_py_function = "PyUnicode_FromOrdinal"
from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
def can_coerce_to_pyunicode(self, env):
def can_coerce_to_pystring(self, env, format_spec=None):
return False
def create_from_py_utility_code(self, env):
......
......@@ -570,51 +570,87 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
/////////////// CIntToPyUnicode.proto ///////////////
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value);
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char);
/////////////// CIntToPyUnicode ///////////////
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
// simple and conservative string allocation on the stack
// NOTE: inlining because "format_char" is always a constant, which collapses lots of code below
static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value, char format_char) {
// simple and conservative C string allocation on the stack: each byte gives at most 3 digits, plus sign
char digits[sizeof({{TYPE}})*3+2];
int length = -1;
// dpos points to end of digits array + 1 at the beginning to allow for pre-decrement looping
char *dpos = digits + sizeof({{TYPE}})*3+2;
int length;
PyObject *uval;
{{TYPE}} remaining;
const {{TYPE}} neg_one = ({{TYPE}}) -1, const_zero = ({{TYPE}}) 0;
const int is_unsigned = neg_one > const_zero;
if (is_unsigned) {
if (sizeof({{TYPE}}) <= sizeof(unsigned int)) {
length = sprintf(digits, "%u", (unsigned int) value);
} else if (sizeof({{TYPE}}) <= sizeof(unsigned long)) {
length = sprintf(digits, "%lu", (unsigned long) value);
} else if (sizeof({{TYPE}}) <= sizeof(unsigned PY_LONG_LONG)) {
length = sprintf(digits, "%llu", (unsigned PY_LONG_LONG) value);
}
} else {
if (sizeof({{TYPE}}) <= sizeof(int)) {
length = sprintf(digits, "%d", (int) value);
} else if (sizeof({{TYPE}}) <= sizeof(long)) {
length = sprintf(digits, "%ld", (long) value);
} else if (sizeof({{TYPE}}) <= sizeof(PY_LONG_LONG)) {
length = sprintf(digits, "%lld", (PY_LONG_LONG) value);
// single character unicode strings are cached in CPython => use PyUnicode_FromOrdinal() for them
if (unlikely((is_unsigned || value >= const_zero) && (
(format_char == 'o') ? value <= 7 : (format_char == 'd') ? value <= 9 : value <= 15))) {
return PyUnicode_FromOrdinal(
((int) value) + (((int) value) <= 9 ? '0' : (format_char == 'x' ? 'a' : 'A') - 10));
}
// surprise: even trivial sprintf() calls don't get optimised in gcc (4.8)
remaining = value;
length = 0;
while (remaining != 0) {
char digit;
switch (format_char) {
case 'o':
digit = '0' + abs(remaining % 8);
remaining = remaining / 8;
break;
case 'd':
digit = '0' + abs(remaining % 10);
remaining = remaining / 10;
break;
case 'x':
case 'X':
digit = '0' + abs(remaining % 16);
remaining = remaining / 16;
if (digit > '9')
digit = digit - '9' - 1 + (format_char == 'x' ? 'a' : 'A');
break;
default:
assert(0);
}
*(--dpos) = digit;
++length;
}
if (unlikely(length < 0)) {
// huge integer type or (unlikely) error in sprintf() => use slow conversion
PyObject *pylong, *uval = NULL;
int one = 1; int little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&value;
pylong = _PyLong_FromByteArray(bytes, sizeof({{TYPE}}), little, !is_unsigned);
if (likely(pylong)) {
#if PY_MAJOR_VERSION >= 3
uval = PyObject_Str(pylong);
if (!is_unsigned && value <= neg_one) {
*(--dpos) = '-';
++length;
}
#if CYTHON_COMPILING_IN_CPYTHON
{
int i;
#if PY_MAJOR_VERSION > 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3
void *udata;
uval = PyUnicode_New(length, 127);
if (unlikely(!uval)) return NULL;
udata = PyUnicode_DATA(uval);
for (i=0; i<length; i++) {
PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, dpos[i]);
}
#else
uval = PyObject_Unicode(pylong);
#endif
Py_DECREF(pylong);
Py_UNICODE *udata;
uval = PyUnicode_FromUnicode(NULL, length);
if (unlikely(!uval)) return uval;
udata = PyUnicode_AS_UNICODE(uval);
for (i=0; i<length; i++) {
udata[i] = dpos[i];
}
#endif
return uval;
}
return PyUnicode_DecodeASCII(digits, length, NULL);
#else
return PyUnicode_DecodeASCII(dpos, length, NULL);
#endif
}
......
......@@ -32,7 +32,7 @@ def format2(ab, cd):
return a, b, c
def format_c_numbers(char c, short s, int n, long l, float f, double d):
def format_c_numbers(signed char c, short s, int n, long l, float f, double d):
"""
>>> s1, s2, s3, s4 = format_c_numbers(123, 135, 12, 12312312, 2.3456, 3.1415926)
>>> print(s1)
......@@ -44,6 +44,16 @@ def format_c_numbers(char c, short s, int n, long l, float f, double d):
>>> print(s4)
C 3.14
>>> s1, s2, s3, s4 = format_c_numbers(-123, -135, -12, -12312312, -2.3456, -3.1415926)
>>> print(s1)
-123-135-12312312-12-2.35
>>> print(s2)
-3.14-2.3
>>> print(s3)
-12f
>>> print(s4)
-C-3.14
"""
s1 = f"{c}{s:4}{l}{n}{f:.3}"
assert isinstance(s1, unicode), type(s1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment