Commit 20b64df5 authored by Stefan Behnel's avatar Stefan Behnel

reimplement ord() to make its (C long) return value and input argument type...

reimplement ord() to make its (C long) return value and input argument type (bytes/unicode/char/unichar) available to the type analysis and avoid redundant round-trips into Python space
parent 4d12bec3
......@@ -165,7 +165,26 @@ builtin_function_table = [
utility_code = iter_next_utility_code), # not available in Py2 => implemented here
#('oct', "", "", ""),
#('open', "ss", "O", "PyFile_FromString"), # not in Py3
#('ord', "", "", ""),
] + [
BuiltinFunction('ord', None, None, "__Pyx_long_cast",
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_long_type, [PyrexTypes.CFuncTypeArg("c", c_type, None)],
is_strict_signature=True))
for c_type in [PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type]
] + [
BuiltinFunction('ord', None, None, "__Pyx_uchar_cast",
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_uchar_type, [PyrexTypes.CFuncTypeArg("c", c_type, None)],
is_strict_signature=True))
for c_type in [PyrexTypes.c_char_type, PyrexTypes.c_schar_type, PyrexTypes.c_uchar_type]
] + [
BuiltinFunction('ord', None, None, "__Pyx_PyObject_Ord",
utility_code=UtilityCode.load_cached("object_ord", "Builtins.c"),
func_type=PyrexTypes.CFuncType(
PyrexTypes.c_long_type, [
PyrexTypes.CFuncTypeArg("c", PyrexTypes.py_object_type, None)
],
exception_value="(long)(Py_UCS4)-1")),
BuiltinFunction('pow', "OOO", "O", "PyNumber_Power"),
BuiltinFunction('pow', "OO", "O", "__Pyx_PyNumber_Power2",
utility_code = UtilityCode.load("pow2", "Builtins.c")),
......
......@@ -1422,6 +1422,31 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
stop=stop,
step=step or ExprNodes.NoneNode(node.pos))
def _handle_simple_function_ord(self, node, pos_args):
"""Unpack ord('X').
"""
if len(pos_args) != 1:
return node
arg = pos_args[0]
if isinstance(arg, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)):
if len(arg.value) == 1:
return ExprNodes.IntNode(
arg.pos, type=PyrexTypes.c_long_type,
value=str(ord(arg.value)),
constant_result=ord(arg.value)
)
elif isinstance(arg, ExprNodes.StringNode):
if arg.unicode_value and len(arg.unicode_value) == 1 \
and ord(arg.unicode_value) <= 255: # Py2/3 portability
return ExprNodes.IntNode(
arg.pos, type=PyrexTypes.c_int_type,
value=str(ord(arg.unicode_value)),
constant_result=ord(arg.unicode_value)
)
return node
# sequence processing
class YieldNodeCollector(Visitor.TreeVisitor):
def __init__(self):
Visitor.TreeVisitor.__init__(self)
......@@ -1632,7 +1657,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
yield_expression, yield_stat_node = self._find_single_yield_expression(loop_node)
if yield_expression is None:
return node
else: # ComprehensionNode
else: # ComprehensionNode
yield_stat_node = gen_expr_node.append
yield_expression = yield_stat_node.expr
try:
......@@ -1712,6 +1737,8 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
return last_result
# builtin type creation
def _DISABLED_handle_simple_function_tuple(self, node, pos_args):
if not pos_args:
return ExprNodes.TupleNode(node.pos, args=[], constant_result=())
......@@ -2452,7 +2479,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
if arg.arg.type.is_unicode_char:
return ExprNodes.TypecastNode(
arg.pos, operand=arg.arg, type=PyrexTypes.c_int_type
arg.pos, operand=arg.arg, type=PyrexTypes.c_long_type
).coerce_to(node.type, self.current_env())
elif isinstance(arg, ExprNodes.UnicodeNode):
if len(arg.value) == 1:
......
......@@ -239,10 +239,53 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_abs_longlong(PY_LONG_LONG x) {
#endif
}
//////////////////// pow2.proto ////////////////////
#define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None)
//////////////////// object_ord.proto ////////////////////
//@requires: TypeConversion.c::UnicodeAsUCS4
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyObject_Ord(c) \
(likely(PyUnicode_Check(c)) ? (long)__Pyx_PyUnicode_AsPy_UCS4(c) : __Pyx__PyObject_Ord(c))
#else
#define __Pyx_PyObject_Ord(c) __Pyx__PyObject_Ord(c)
#endif
static long __Pyx__PyObject_Ord(PyObject* c); /*proto*/
//////////////////// object_ord ////////////////////
static long __Pyx__PyObject_Ord(PyObject* c) {
Py_ssize_t size;
if (PyBytes_Check(c)) {
size = PyBytes_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyBytes_AS_STRING(c)[0];
}
#if PY_MAJOR_VERSION < 3
} else if (PyUnicode_Check(c)) {
return (long)__Pyx_PyUnicode_AsPy_UCS4(c);
#endif
} else if (PyByteArray_Check(c)) {
size = PyByteArray_GET_SIZE(c);
if (likely(size == 1)) {
return (unsigned char) PyByteArray_AS_STRING(c)[0];
}
} else {
// FIXME: support character buffers - but CPython doesn't support them either
PyErr_Format(PyExc_TypeError,
"ord() expected string of length 1, but %.200s found", c->ob_type->tp_name);
return (long)(Py_UCS4)-1;
}
PyErr_Format(PyExc_TypeError,
"ord() expected a character, but string of length %zd found", size);
return (long)(Py_UCS4)-1;
}
//////////////////// py_dict_keys.proto ////////////////////
static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d); /*proto*/
......
......@@ -2,6 +2,9 @@
/* Type Conversion Predeclarations */
#define __Pyx_uchar_cast(c) ((unsigned char)c)
#define __Pyx_long_cast(x) ((long)x)
#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
(sizeof(type) < sizeof(Py_ssize_t)) || \
(sizeof(type) > sizeof(Py_ssize_t) && \
......@@ -391,43 +394,54 @@ bad:
}
/////////////// ObjectAsUCS4.proto ///////////////
/////////////// UnicodeAsUCS4.proto ///////////////
static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject*);
static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*);
/////////////// ObjectAsUCS4 ///////////////
/////////////// UnicodeAsUCS4 ///////////////
static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
long ival;
if (PyUnicode_Check(x)) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
length = PyUnicode_GET_LENGTH(x);
if (likely(length == 1)) {
return PyUnicode_READ_CHAR(x, 0);
}
#else
length = PyUnicode_GET_SIZE(x);
if (likely(length == 1)) {
return PyUnicode_AS_UNICODE(x)[0];
}
#if Py_UNICODE_SIZE == 2
else if (PyUnicode_GET_SIZE(x) == 2) {
Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
if (high_val >= 0xD800 && high_val <= 0xDBFF) {
Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
if (low_val >= 0xDC00 && low_val <= 0xDFFF) {
return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1)));
}
static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
length = PyUnicode_GET_LENGTH(x);
if (likely(length == 1)) {
return PyUnicode_READ_CHAR(x, 0);
}
#else
length = PyUnicode_GET_SIZE(x);
if (likely(length == 1)) {
return PyUnicode_AS_UNICODE(x)[0];
}
#if Py_UNICODE_SIZE == 2
else if (PyUnicode_GET_SIZE(x) == 2) {
Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0];
if (high_val >= 0xD800 && high_val <= 0xDBFF) {
Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1];
if (low_val >= 0xDC00 && low_val <= 0xDFFF) {
return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1)));
}
}
#endif
#endif
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UCS4, "
"got length %" CYTHON_FORMAT_SSIZE_T "d", length);
return (Py_UCS4)-1;
}
#endif
#endif
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UCS4, "
"got length %" CYTHON_FORMAT_SSIZE_T "d", length);
return (Py_UCS4)-1;
}
/////////////// ObjectAsUCS4.proto ///////////////
//@requires: UnicodeAsUCS4
#define __Pyx_PyObject_AsPy_UCS4(x) \
(likely(PyUnicode_Check(x)) ? __Pyx_PyUnicode_AsPy_UCS4(x) : __Pyx__PyObject_AsPy_UCS4(x))
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject*);
/////////////// ObjectAsUCS4 ///////////////
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject* x) {
long ival;
ival = __Pyx_PyInt_As_long(x);
if (unlikely(ival < 0)) {
if (!PyErr_Occurred())
......@@ -442,6 +456,7 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) {
return (Py_UCS4)ival;
}
/////////////// ObjectAsPyUnicode.proto ///////////////
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
......
cimport cython
import sys
uspace = u' '
ustring_with_a = u'abcdefg'
ustring_without_a = u'bcdefg'
@cython.test_fail_if_path_exists('//SimpleCallNode')
@cython.test_assert_path_exists(
# ord() should receive and return a C value
'//ReturnStatNode//CoerceToPyTypeNode//SimpleCallNode')
@cython.test_fail_if_path_exists(
'//ReturnStatNode//SimpleCallNode//CoerceToPyTypeNode')
def ord_Py_UNICODE(unicode s):
"""
>>> ord_Py_UNICODE(uspace)
......@@ -15,17 +22,23 @@ def ord_Py_UNICODE(unicode s):
u = s[0]
return ord(u)
@cython.test_assert_path_exists('//IntNode')
@cython.test_assert_path_exists('//TupleNode//IntNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
def ord_const():
"""
>>> ord_const()
>>> ord(b' ')
32
>>> ord(' ')
32
>>> ord_const()
(32, 32, 32, 255, 255, 4660, 0)
"""
return ord(u' ')
return ord(u' '), ord(b' '), ord(' '), ord('\xff'), ord(b'\xff'), ord(u'\u1234'), ord('\0')
@cython.test_assert_path_exists('//PrimaryCmpNode//IntNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
#@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_for_loop_ord(unicode s):
"""
>>> unicode_for_loop_ord(ustring_with_a)
......@@ -37,3 +50,43 @@ def unicode_for_loop_ord(unicode s):
if ord(c) == ord(u'a'):
return True
return False
def compare_to_char(s):
"""
>>> compare_to_char(uspace)
False
>>> compare_to_char(b'a')
False
>>> compare_to_char(b'x')
True
>>> compare_to_char('x')
True
"""
cdef char c = b'x'
return ord(s) == c
def ord_object(s):
"""
>>> try: ord_object('abc')
... except ValueError: assert sys.version_info[0] >= 3
... except TypeError: assert sys.version_info[0] < 3
>>> ord_object('a')
97
>>> ord_object(b'a')
97
"""
return ord(s)
def non_builtin_ord(s):
"""
>>> non_builtin_ord('x')
(123, 123)
"""
def _ord(s):
return 123
ord = _ord
return ord(s), _ord(s)
......@@ -5,6 +5,10 @@ cimport cython
cdef Py_UCS4 char_ASCII = u'A'
cdef Py_UCS4 char_KLINGON = u'\uF8D2'
u_A = char_ASCII
u_KLINGON = char_KLINGON
def compare_ASCII():
"""
>>> compare_ASCII()
......@@ -86,6 +90,19 @@ def unicode_ordinal(Py_UCS4 i):
"""
return i
def ord_py_ucs4(Py_UCS4 x):
"""
>>> ord_py_ucs4(u0)
0
>>> ord_py_ucs4(u_A)
65
>>> ord_py_ucs4(u_KLINGON)
63698
"""
return ord(x)
@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_type_methods(Py_UCS4 uchar):
......
......@@ -5,6 +5,10 @@ cimport cython
cdef Py_UNICODE char_ASCII = u'A'
cdef Py_UNICODE char_KLINGON = u'\uF8D2'
u_A = char_ASCII
u_KLINGON = char_KLINGON
def compare_ASCII():
"""
>>> compare_ASCII()
......@@ -78,6 +82,19 @@ def unicode_ordinal(Py_UNICODE i):
"""
return i
def ord_pyunicode(Py_UNICODE x):
"""
>>> ord_pyunicode(u0)
0
>>> ord_pyunicode(u_A)
65
>>> ord_pyunicode(u_KLINGON)
63698
"""
return ord(x)
@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_type_methods(Py_UNICODE uchar):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment