Commit 20b64df5 authored by Stefan Behnel's avatar Stefan Behnel

reimplement ord() to make its (C long) return value and input argument type...

reimplement ord() to make its (C long) return value and input argument type (bytes/unicode/char/unichar) available to the type analysis and avoid redundant round-trips into Python space
parent 4d12bec3
...@@ -165,7 +165,26 @@ builtin_function_table = [ ...@@ -165,7 +165,26 @@ builtin_function_table = [
utility_code = iter_next_utility_code), # not available in Py2 => implemented here utility_code = iter_next_utility_code), # not available in Py2 => implemented here
#('oct', "", "", ""), #('oct', "", "", ""),
#('open', "ss", "O", "PyFile_FromString"), # not in Py3 #('open', "ss", "O", "PyFile_FromString"), # not in Py3
#('ord', "", "", ""), ] + [
BuiltinFunction('ord', None, None, "__Pyx_long_cast",
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_long_type, [PyrexTypes.CFuncTypeArg("c", c_type, None)],
is_strict_signature=True))
for c_type in [PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type]
] + [
BuiltinFunction('ord', None, None, "__Pyx_uchar_cast",
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_uchar_type, [PyrexTypes.CFuncTypeArg("c", c_type, None)],
is_strict_signature=True))
for c_type in [PyrexTypes.c_char_type, PyrexTypes.c_schar_type, PyrexTypes.c_uchar_type]
] + [
BuiltinFunction('ord', None, None, "__Pyx_PyObject_Ord",
utility_code=UtilityCode.load_cached("object_ord", "Builtins.c"),
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_long_type, [
PyrexTypes.CFuncTypeArg("c", PyrexTypes.py_object_type, None)
],
exception_value="(long)(Py_UCS4)-1")),
BuiltinFunction('pow', "OOO", "O", "PyNumber_Power"), BuiltinFunction('pow', "OOO", "O", "PyNumber_Power"),
BuiltinFunction('pow', "OO", "O", "__Pyx_PyNumber_Power2", BuiltinFunction('pow', "OO", "O", "__Pyx_PyNumber_Power2",
utility_code = UtilityCode.load("pow2", "Builtins.c")), utility_code = UtilityCode.load("pow2", "Builtins.c")),
......
...@@ -1422,6 +1422,31 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform): ...@@ -1422,6 +1422,31 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
stop=stop, stop=stop,
step=step or ExprNodes.NoneNode(node.pos)) step=step or ExprNodes.NoneNode(node.pos))
def _handle_simple_function_ord(self, node, pos_args):
"""Unpack ord('X').
"""
if len(pos_args) != 1:
return node
arg = pos_args[0]
if isinstance(arg, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)):
if len(arg.value) == 1:
return ExprNodes.IntNode(
arg.pos, type=PyrexTypes.c_long_type,
value=str(ord(arg.value)),
constant_result=ord(arg.value)
)
elif isinstance(arg, ExprNodes.StringNode):
if arg.unicode_value and len(arg.unicode_value) == 1 \
and ord(arg.unicode_value) <= 255: # Py2/3 portability
return ExprNodes.IntNode(
arg.pos, type=PyrexTypes.c_int_type,
value=str(ord(arg.unicode_value)),
constant_result=ord(arg.unicode_value)
)
return node
# sequence processing
class YieldNodeCollector(Visitor.TreeVisitor): class YieldNodeCollector(Visitor.TreeVisitor):
def __init__(self): def __init__(self):
Visitor.TreeVisitor.__init__(self) Visitor.TreeVisitor.__init__(self)
...@@ -1632,7 +1657,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform): ...@@ -1632,7 +1657,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
yield_expression, yield_stat_node = self._find_single_yield_expression(loop_node) yield_expression, yield_stat_node = self._find_single_yield_expression(loop_node)
if yield_expression is None: if yield_expression is None:
return node return node
else: # ComprehensionNode else: # ComprehensionNode
yield_stat_node = gen_expr_node.append yield_stat_node = gen_expr_node.append
yield_expression = yield_stat_node.expr yield_expression = yield_stat_node.expr
try: try:
...@@ -1712,6 +1737,8 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform): ...@@ -1712,6 +1737,8 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
return last_result return last_result
# builtin type creation
def _DISABLED_handle_simple_function_tuple(self, node, pos_args): def _DISABLED_handle_simple_function_tuple(self, node, pos_args):
if not pos_args: if not pos_args:
return ExprNodes.TupleNode(node.pos, args=[], constant_result=()) return ExprNodes.TupleNode(node.pos, args=[], constant_result=())
...@@ -2452,7 +2479,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, ...@@ -2452,7 +2479,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
if isinstance(arg, ExprNodes.CoerceToPyTypeNode): if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
if arg.arg.type.is_unicode_char: if arg.arg.type.is_unicode_char:
return ExprNodes.TypecastNode( return ExprNodes.TypecastNode(
arg.pos, operand=arg.arg, type=PyrexTypes.c_int_type arg.pos, operand=arg.arg, type=PyrexTypes.c_long_type
).coerce_to(node.type, self.current_env()) ).coerce_to(node.type, self.current_env())
elif isinstance(arg, ExprNodes.UnicodeNode): elif isinstance(arg, ExprNodes.UnicodeNode):
if len(arg.value) == 1: if len(arg.value) == 1:
......
...@@ -239,10 +239,53 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_abs_longlong(PY_LONG_LONG x) { ...@@ -239,10 +239,53 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_abs_longlong(PY_LONG_LONG x) {
#endif #endif
} }
//////////////////// pow2.proto //////////////////// //////////////////// pow2.proto ////////////////////
#define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None) #define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None)
//////////////////// object_ord.proto ////////////////////
//@requires: TypeConversion.c::UnicodeAsUCS4
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyObject_Ord(c) \
(likely(PyUnicode_Check(c)) ? (long)__Pyx_PyUnicode_AsPy_UCS4(c) : __Pyx__PyObject_Ord(c))
#else
#define __Pyx_PyObject_Ord(c) __Pyx__PyObject_Ord(c)
#endif
static long __Pyx__PyObject_Ord(PyObject* c); /*proto*/
//////////////////// object_ord ////////////////////
static long __Pyx__PyObject_Ord(PyObject* c) {
Py_ssize_t size;
if (PyBytes_Check(c)) {
size = PyBytes_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyBytes_AS_STRING(c)[0];
}
#if PY_MAJOR_VERSION < 3
} else if (PyUnicode_Check(c)) {
return (long)__Pyx_PyUnicode_AsPy_UCS4(c);
#endif
} else if (PyByteArray_Check(c)) {
size = PyByteArray_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyByteArray_AS_STRING(c)[0];
}
} else {
// FIXME: support character buffers - but CPython doesn't support them either
PyErr_Format(PyExc_TypeError,
"ord() expected string of length 1, but %.200s found", c->ob_type->tp_name);
return (long)(Py_UCS4)-1;
}
PyErr_Format(PyExc_TypeError,
"ord() expected a character, but string of length %zd found", size);
return (long)(Py_UCS4)-1;
}
//////////////////// py_dict_keys.proto //////////////////// //////////////////// py_dict_keys.proto ////////////////////
static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d); /*proto*/ static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d); /*proto*/
......
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
/* Type Conversion Predeclarations */ /* Type Conversion Predeclarations */
#define __Pyx_uchar_cast(c) ((unsigned char)c)
#define __Pyx_long_cast(x) ((long)x)
#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \ #define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
(sizeof(type) < sizeof(Py_ssize_t)) || \ (sizeof(type) < sizeof(Py_ssize_t)) || \
(sizeof(type) > sizeof(Py_ssize_t) && \ (sizeof(type) > sizeof(Py_ssize_t) && \
...@@ -391,43 +394,54 @@ bad: ...@@ -391,43 +394,54 @@ bad:
} }
/////////////// ObjectAsUCS4.proto /////////////// /////////////// UnicodeAsUCS4.proto ///////////////
static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject*); static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*);
/////////////// ObjectAsUCS4 /////////////// /////////////// UnicodeAsUCS4 ///////////////
static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) {
long ival; Py_ssize_t length;
if (PyUnicode_Check(x)) { #if CYTHON_PEP393_ENABLED
Py_ssize_t length; length = PyUnicode_GET_LENGTH(x);
#if CYTHON_PEP393_ENABLED if (likely(length == 1)) {
length = PyUnicode_GET_LENGTH(x); return PyUnicode_READ_CHAR(x, 0);
if (likely(length == 1)) { }
return PyUnicode_READ_CHAR(x, 0); #else
} length = PyUnicode_GET_SIZE(x);
#else if (likely(length == 1)) {
length = PyUnicode_GET_SIZE(x); return PyUnicode_AS_UNICODE(x)[0];
if (likely(length == 1)) { }
return PyUnicode_AS_UNICODE(x)[0]; #if Py_UNICODE_SIZE == 2
} else if (PyUnicode_GET_SIZE(x) == 2) {
#if Py_UNICODE_SIZE == 2 Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
else if (PyUnicode_GET_SIZE(x) == 2) { if (high_val >= 0xD800 && high_val <= 0xDBFF) {
Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
if (high_val >= 0xD800 && high_val <= 0xDBFF) { if (low_val >= 0xDC00 && low_val <= 0xDFFF) {
Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1)));
if (low_val >= 0xDC00 && low_val <= 0xDFFF) {
return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1)));
}
} }
} }
#endif
#endif
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UCS4, "
"got length %" CYTHON_FORMAT_SSIZE_T "d", length);
return (Py_UCS4)-1;
} }
#endif
#endif
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UCS4, "
"got length %" CYTHON_FORMAT_SSIZE_T "d", length);
return (Py_UCS4)-1;
}
/////////////// ObjectAsUCS4.proto ///////////////
//@requires: UnicodeAsUCS4
#define __Pyx_PyObject_AsPy_UCS4(x) \
(likely(PyUnicode_Check(x)) ? __Pyx_PyUnicode_AsPy_UCS4(x) : __Pyx__PyObject_AsPy_UCS4(x))
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject*);
/////////////// ObjectAsUCS4 ///////////////
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject* x) {
long ival;
ival = __Pyx_PyInt_As_long(x); ival = __Pyx_PyInt_As_long(x);
if (unlikely(ival < 0)) { if (unlikely(ival < 0)) {
if (!PyErr_Occurred()) if (!PyErr_Occurred())
...@@ -442,6 +456,7 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { ...@@ -442,6 +456,7 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
return (Py_UCS4)ival; return (Py_UCS4)ival;
} }
/////////////// ObjectAsPyUnicode.proto /////////////// /////////////// ObjectAsPyUnicode.proto ///////////////
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*); static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
......
cimport cython cimport cython
import sys
uspace = u' ' uspace = u' '
ustring_with_a = u'abcdefg' ustring_with_a = u'abcdefg'
ustring_without_a = u'bcdefg' ustring_without_a = u'bcdefg'
@cython.test_fail_if_path_exists('//SimpleCallNode')
@cython.test_assert_path_exists(
# ord() should receive and return a C value
'//ReturnStatNode//CoerceToPyTypeNode//SimpleCallNode')
@cython.test_fail_if_path_exists(
'//ReturnStatNode//SimpleCallNode//CoerceToPyTypeNode')
def ord_Py_UNICODE(unicode s): def ord_Py_UNICODE(unicode s):
""" """
>>> ord_Py_UNICODE(uspace) >>> ord_Py_UNICODE(uspace)
...@@ -15,17 +22,23 @@ def ord_Py_UNICODE(unicode s): ...@@ -15,17 +22,23 @@ def ord_Py_UNICODE(unicode s):
u = s[0] u = s[0]
return ord(u) return ord(u)
@cython.test_assert_path_exists('//IntNode')
@cython.test_assert_path_exists('//TupleNode//IntNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') @cython.test_fail_if_path_exists('//SimpleCallNode')
def ord_const(): def ord_const():
""" """
>>> ord_const() >>> ord(b' ')
32
>>> ord(' ')
32 32
>>> ord_const()
(32, 32, 32, 255, 255, 4660, 0)
""" """
return ord(u' ') return ord(u' '), ord(b' '), ord(' '), ord('\xff'), ord(b'\xff'), ord(u'\u1234'), ord('\0')
@cython.test_assert_path_exists('//PrimaryCmpNode//IntNode') @cython.test_assert_path_exists('//PrimaryCmpNode//IntNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') #@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_for_loop_ord(unicode s): def unicode_for_loop_ord(unicode s):
""" """
>>> unicode_for_loop_ord(ustring_with_a) >>> unicode_for_loop_ord(ustring_with_a)
...@@ -37,3 +50,43 @@ def unicode_for_loop_ord(unicode s): ...@@ -37,3 +50,43 @@ def unicode_for_loop_ord(unicode s):
if ord(c) == ord(u'a'): if ord(c) == ord(u'a'):
return True return True
return False return False
def compare_to_char(s):
"""
>>> compare_to_char(uspace)
False
>>> compare_to_char(b'a')
False
>>> compare_to_char(b'x')
True
>>> compare_to_char('x')
True
"""
cdef char c = b'x'
return ord(s) == c
def ord_object(s):
"""
>>> try: ord_object('abc')
... except ValueError: assert sys.version_info[0] >= 3
... except TypeError: assert sys.version_info[0] < 3
>>> ord_object('a')
97
>>> ord_object(b'a')
97
"""
return ord(s)
def non_builtin_ord(s):
"""
>>> non_builtin_ord('x')
(123, 123)
"""
def _ord(s):
return 123
ord = _ord
return ord(s), _ord(s)
...@@ -5,6 +5,10 @@ cimport cython ...@@ -5,6 +5,10 @@ cimport cython
cdef Py_UCS4 char_ASCII = u'A' cdef Py_UCS4 char_ASCII = u'A'
cdef Py_UCS4 char_KLINGON = u'\uF8D2' cdef Py_UCS4 char_KLINGON = u'\uF8D2'
u_A = char_ASCII
u_KLINGON = char_KLINGON
def compare_ASCII(): def compare_ASCII():
""" """
>>> compare_ASCII() >>> compare_ASCII()
...@@ -86,6 +90,19 @@ def unicode_ordinal(Py_UCS4 i): ...@@ -86,6 +90,19 @@ def unicode_ordinal(Py_UCS4 i):
""" """
return i return i
def ord_py_ucs4(Py_UCS4 x):
"""
>>> ord_py_ucs4(u0)
0
>>> ord_py_ucs4(u_A)
65
>>> ord_py_ucs4(u_KLINGON)
63698
"""
return ord(x)
@cython.test_assert_path_exists('//PythonCapiCallNode') @cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') @cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_type_methods(Py_UCS4 uchar): def unicode_type_methods(Py_UCS4 uchar):
......
...@@ -5,6 +5,10 @@ cimport cython ...@@ -5,6 +5,10 @@ cimport cython
cdef Py_UNICODE char_ASCII = u'A' cdef Py_UNICODE char_ASCII = u'A'
cdef Py_UNICODE char_KLINGON = u'\uF8D2' cdef Py_UNICODE char_KLINGON = u'\uF8D2'
u_A = char_ASCII
u_KLINGON = char_KLINGON
def compare_ASCII(): def compare_ASCII():
""" """
>>> compare_ASCII() >>> compare_ASCII()
...@@ -78,6 +82,19 @@ def unicode_ordinal(Py_UNICODE i): ...@@ -78,6 +82,19 @@ def unicode_ordinal(Py_UNICODE i):
""" """
return i return i
def ord_pyunicode(Py_UNICODE x):
"""
>>> ord_pyunicode(u0)
0
>>> ord_pyunicode(u_A)
65
>>> ord_pyunicode(u_KLINGON)
63698
"""
return ord(x)
@cython.test_assert_path_exists('//PythonCapiCallNode') @cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') @cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_type_methods(Py_UNICODE uchar): def unicode_type_methods(Py_UNICODE uchar):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment