Commit c0e70e4a authored by Robert Bradshaw's avatar Robert Bradshaw

Merge branch 'master' of github.com:cython/cython

parents 4e1e5440 d61f929f
...@@ -873,6 +873,7 @@ class GlobalState(object): ...@@ -873,6 +873,7 @@ class GlobalState(object):
self.const_cname_counter = 1 self.const_cname_counter = 1
self.string_const_index = {} self.string_const_index = {}
self.pyunicode_ptr_const_index = {}
self.int_const_index = {} self.int_const_index = {}
self.py_constants = [] self.py_constants = []
...@@ -1016,6 +1017,15 @@ class GlobalState(object): ...@@ -1016,6 +1017,15 @@ class GlobalState(object):
c.add_py_version(py_version) c.add_py_version(py_version)
return c return c
def get_pyunicode_ptr_const(self, text):
# return a Py_UNICODE[] constant, creating a new one if necessary
assert text.is_unicode
try:
c = self.pyunicode_ptr_const_index[text]
except KeyError:
c = self.pyunicode_ptr_const_index[text] = self.new_const_cname()
return c
def get_py_string_const(self, text, identifier=None, def get_py_string_const(self, text, identifier=None,
is_str=False, unicode_value=None): is_str=False, unicode_value=None):
# return a Python string constant, creating a new one if necessary # return a Python string constant, creating a new one if necessary
...@@ -1141,6 +1151,17 @@ class GlobalState(object): ...@@ -1141,6 +1151,17 @@ class GlobalState(object):
for py_string in c.py_strings.values(): for py_string in c.py_strings.values():
py_strings.append((c.cname, len(py_string.cname), py_string)) py_strings.append((c.cname, len(py_string.cname), py_string))
for c, cname in self.pyunicode_ptr_const_index.items():
utf16_array, utf32_array = StringEncoding.encode_pyunicode_string(c)
if utf16_array:
# Narrow and wide representations differ
decls_writer.putln("#ifdef Py_UNICODE_WIDE")
decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf32_array))
if utf16_array:
decls_writer.putln("#else")
decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf16_array))
decls_writer.putln("#endif")
if py_strings: if py_strings:
self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c")) self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c"))
py_strings.sort() py_strings.sort()
...@@ -1435,6 +1456,9 @@ class CCodeWriter(object): ...@@ -1435,6 +1456,9 @@ class CCodeWriter(object):
def get_string_const(self, text): def get_string_const(self, text):
return self.globalstate.get_string_const(text).cname return self.globalstate.get_string_const(text).cname
def get_pyunicode_ptr_const(self, text):
return self.globalstate.get_pyunicode_ptr_const(text)
def get_py_string_const(self, text, identifier=None, def get_py_string_const(self, text, identifier=None,
is_str=False, unicode_value=None): is_str=False, unicode_value=None):
return self.globalstate.get_py_string_const( return self.globalstate.get_py_string_const(
......
This diff is collapsed.
...@@ -7981,8 +7981,7 @@ class CnameDecoratorNode(StatNode): ...@@ -7981,8 +7981,7 @@ class CnameDecoratorNode(StatNode):
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
if Options.gcc_branch_hints: if Options.gcc_branch_hints:
branch_prediction_macros = \ branch_prediction_macros = """
"""
#ifdef __GNUC__ #ifdef __GNUC__
/* Test for GCC > 2.95 */ /* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
...@@ -7996,26 +7995,18 @@ if Options.gcc_branch_hints: ...@@ -7996,26 +7995,18 @@ if Options.gcc_branch_hints:
#define likely(x) (x) #define likely(x) (x)
#define unlikely(x) (x) #define unlikely(x) (x)
#endif /* __GNUC__ */ #endif /* __GNUC__ */
""" """
else: else:
branch_prediction_macros = \ branch_prediction_macros = """
"""
#define likely(x) (x) #define likely(x) (x)
#define unlikely(x) (x) #define unlikely(x) (x)
""" """
#get_name_predeclaration = \
#"static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/"
#get_name_interned_predeclaration = \
#"static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/"
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
printing_utility_code = UtilityCode.load_cached("Print", "Printing.c") printing_utility_code = UtilityCode.load_cached("Print", "Printing.c")
printing_one_utility_code = UtilityCode.load_cached("PrintOne", "Printing.c") printing_one_utility_code = UtilityCode.load_cached("PrintOne", "Printing.c")
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
# Exception raising code # Exception raising code
......
...@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None) PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None)
]) ])
Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_py_unicode_ptr_type, None)
])
PyObject_Size_func_type = PyrexTypes.CFuncType( PyObject_Size_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.c_py_ssize_t_type, [
PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None) PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
...@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
_ext_types_with_pysize = set(["cpython.array.array"]) _ext_types_with_pysize = set(["cpython.array.array"])
def _handle_simple_function_len(self, node, pos_args): def _handle_simple_function_len(self, node, pos_args):
"""Replace len(char*) by the equivalent call to strlen() and """Replace len(char*) by the equivalent call to strlen(),
len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and
len(known_builtin_type) by an equivalent C-API call. len(known_builtin_type) by an equivalent C-API call.
""" """
if len(pos_args) != 1: if len(pos_args) != 1:
...@@ -2011,6 +2017,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2011,6 +2017,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
args = [arg], args = [arg],
is_temp = node.is_temp, is_temp = node.is_temp,
utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c")) utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
elif arg.type.is_pyunicode_ptr:
new_node = ExprNodes.PythonCapiCallNode(
node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
args = [arg],
is_temp = node.is_temp)
elif arg.type.is_pyobject: elif arg.type.is_pyobject:
cfunc_name = self._map_to_capi_len_function(arg.type) cfunc_name = self._map_to_capi_len_function(arg.type)
if cfunc_name is None: if cfunc_name is None:
......
...@@ -145,6 +145,7 @@ class PyrexType(BaseType): ...@@ -145,6 +145,7 @@ class PyrexType(BaseType):
# is_enum boolean Is a C enum type # is_enum boolean Is a C enum type
# is_typedef boolean Is a typedef type # is_typedef boolean Is a typedef type
# is_string boolean Is a C char * type # is_string boolean Is a C char * type
# is_pyunicode_ptr boolean Is a C PyUNICODE * type
# is_cpp_string boolean Is a C++ std::string type # is_cpp_string boolean Is a C++ std::string type
# is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE # is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE
# is_returncode boolean Is used only to signal exceptions # is_returncode boolean Is used only to signal exceptions
...@@ -202,6 +203,7 @@ class PyrexType(BaseType): ...@@ -202,6 +203,7 @@ class PyrexType(BaseType):
is_enum = 0 is_enum = 0
is_typedef = 0 is_typedef = 0
is_string = 0 is_string = 0
is_pyunicode_ptr = 0
is_unicode_char = 0 is_unicode_char = 0
is_returncode = 0 is_returncode = 0
is_error = 0 is_error = 0
...@@ -873,7 +875,7 @@ class PyObjectType(PyrexType): ...@@ -873,7 +875,7 @@ class PyObjectType(PyrexType):
def assignable_from(self, src_type): def assignable_from(self, src_type):
# except for pointers, conversion will be attempted # except for pointers, conversion will be attempted
return not src_type.is_ptr or src_type.is_string return not src_type.is_ptr or src_type.is_string or src_type.is_pyunicode_ptr
def declaration_code(self, entity_code, def declaration_code(self, entity_code,
for_display = 0, dll_linkage = None, pyrex = 0): for_display = 0, dll_linkage = None, pyrex = 0):
...@@ -1163,7 +1165,7 @@ class CType(PyrexType): ...@@ -1163,7 +1165,7 @@ class CType(PyrexType):
def error_condition(self, result_code): def error_condition(self, result_code):
conds = [] conds = []
if self.is_string: if self.is_string or self.is_pyunicode_ptr:
conds.append("(!%s)" % result_code) conds.append("(!%s)" % result_code)
elif self.exception_value is not None: elif self.exception_value is not None:
conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value)) conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value))
...@@ -2180,6 +2182,9 @@ class CPointerBaseType(CType): ...@@ -2180,6 +2182,9 @@ class CPointerBaseType(CType):
if base_type.same_as(char_type): if base_type.same_as(char_type):
self.is_string = 1 self.is_string = 1
break break
else:
if base_type.same_as(c_py_unicode_type):
self.is_pyunicode_ptr = 1
if self.is_string and not base_type.is_error: if self.is_string and not base_type.is_error:
if base_type.signed: if base_type.signed:
...@@ -2191,10 +2196,17 @@ class CPointerBaseType(CType): ...@@ -2191,10 +2196,17 @@ class CPointerBaseType(CType):
if self.is_ptr: if self.is_ptr:
self.from_py_function = "__Pyx_PyObject_AsUString" self.from_py_function = "__Pyx_PyObject_AsUString"
self.exception_value = "NULL" self.exception_value = "NULL"
elif self.is_pyunicode_ptr and not base_type.is_error:
self.to_py_function = "__Pyx_PyUnicode_FromUnicode"
if self.is_ptr:
self.from_py_function = "__Pyx_PyUnicode_AsUnicode"
self.exception_value = "NULL"
def py_type_name(self): def py_type_name(self):
if self.is_string: if self.is_string:
return "bytes" return "bytes"
elif self.is_pyunicode_ptr:
return "unicode"
else: else:
return super(CPointerBaseType, self).py_type_name() return super(CPointerBaseType, self).py_type_name()
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import re import re
import sys import sys
import array
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
_unicode, _str, _bytes = str, str, bytes _unicode, _str, _bytes = str, str, bytes
...@@ -262,3 +263,22 @@ def split_string_literal(s, limit=2000): ...@@ -262,3 +263,22 @@ def split_string_literal(s, limit=2000):
chunks.append(s[start:end]) chunks.append(s[start:end])
start = end start = end
return '""'.join(chunks) return '""'.join(chunks)
def encode_pyunicode_string(s):
"""Create Py_UNICODE[] representation of a given unicode string.
"""
utf32_array = array.array('i', s.encode('UTF-32'))
assert utf32_array.itemsize == 4
utf32_array.pop(0) # Remove BOM
utf32_array.append(0) # Add NULL terminator
for c in utf32_array:
if c > 65535:
utf16_array = array.array('H', s.encode('UTF-16'))
utf16_array.pop(0) # Remove BOM
utf16_array.append(0) # Add NULL terminator
break
else:
utf16_array = []
return ",".join(map(unicode, utf16_array)), ",".join(map(unicode, utf32_array))
...@@ -182,3 +182,5 @@ from cpython.pycapsule cimport * ...@@ -182,3 +182,5 @@ from cpython.pycapsule cimport *
################################################################# #################################################################
# END OF DEPRECATED SECTION # END OF DEPRECATED SECTION
################################################################# #################################################################
from cpython.datetime cimport *
from cpython.ref cimport PyObject
cdef extern from "Python.h":
ctypedef struct PyTypeObject:
pass
cdef extern from "datetime.h":
ctypedef extern class datetime.date[object PyDateTime_Date]:
pass
ctypedef extern class datetime.time[object PyDateTime_Time]:
pass
ctypedef extern class datetime.datetime[object PyDateTime_DateTime]:
pass
ctypedef extern class datetime.timedelta[object PyDateTime_Delta]:
pass
ctypedef extern class datetime.tzinfo[object PyDateTime_TZInfo]:
pass
ctypedef struct PyDateTime_Date:
pass
ctypedef struct PyDateTime_Time:
char hastzinfo
PyObject *tzinfo
ctypedef struct PyDateTime_DateTime:
char hastzinfo
PyObject *tzinfo
ctypedef struct PyDateTime_Delta:
int days
int seconds
int microseconds
# Define structure for C API.
ctypedef struct PyDateTime_CAPI:
# type objects
PyTypeObject *DateType
PyTypeObject *DateTimeType
PyTypeObject *TimeType
PyTypeObject *DeltaType
PyTypeObject *TZInfoType
# constructors
object (*Date_FromDate)(int, int, int, PyTypeObject*)
object (*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, object, PyTypeObject*)
object (*Time_FromTime)(int, int, int, int, object, PyTypeObject*)
object (*Delta_FromDelta)(int, int, int, int, PyTypeObject*)
# constructors for the DB API
object (*DateTime_FromTimestamp)(object, object, object)
object (*Date_FromTimestamp)(object, object)
# Check type of the object.
bint PyDate_Check(object op)
bint PyDate_CheckExact(object op)
bint PyDateTime_Check(object op)
bint PyDateTime_CheckExact(object op)
bint PyTime_Check(object op)
bint PyTime_CheckExact(object op)
bint PyDelta_Check(object op)
bint PyDelta_CheckExact(object op)
bint PyTZInfo_Check(object op)
bint PyTZInfo_CheckExact(object op)
# Getters for date and datetime (C macros).
int PyDateTime_GET_YEAR(object o)
int PyDateTime_GET_MONTH(object o)
int PyDateTime_GET_DAY(object o)
# Getters for datetime (C macros).
int PyDateTime_DATE_GET_HOUR(object o)
int PyDateTime_DATE_GET_MINUTE(object o)
int PyDateTime_DATE_GET_SECOND(object o)
int PyDateTime_DATE_GET_MICROSECOND(object o)
# Getters for time (C macros).
int PyDateTime_TIME_GET_HOUR(object o)
int PyDateTime_TIME_GET_MINUTE(object o)
int PyDateTime_TIME_GET_SECOND(object o)
int PyDateTime_TIME_GET_MICROSECOND(object o)
# Getters for timedelta (C macros).
#int PyDateTime_DELTA_GET_DAYS(object o)
#int PyDateTime_DELTA_GET_SECONDS(object o)
#int PyDateTime_DELTA_GET_MICROSECONDS(object o)
# PyDateTime CAPI object.
PyDateTime_CAPI *PyDateTimeAPI
void PyDateTime_IMPORT()
# Datetime C API initialization function.
# You have to call it before any usage of DateTime CAPI functions.
cdef inline void import_datetime():
PyDateTime_IMPORT
# Create date object using DateTime CAPI factory function.
# Note, there are no range checks for any of the arguments.
cdef inline object date_new(int year, int month, int day):
return PyDateTimeAPI.Date_FromDate(year, month, day, PyDateTimeAPI.DateType)
# Create time object using DateTime CAPI factory function
# Note, there are no range checks for any of the arguments.
cdef inline object time_new(int hour, int minute, int second, int microsecond, object tz):
return PyDateTimeAPI.Time_FromTime(hour, minute, second, microsecond, tz, PyDateTimeAPI.TimeType)
# Create datetime object using DateTime CAPI factory function.
# Note, there are no range checks for any of the arguments.
cdef inline object datetime_new(int year, int month, int day, int hour, int minute, int second, int microsecond, object tz):
return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, hour, minute, second, microsecond, tz, PyDateTimeAPI.DateTimeType)
# Create timedelta object using DateTime CAPI factory function.
# Note, there are no range checks for any of the arguments.
cdef inline object timedelta_new(int days, int seconds, int useconds):
return PyDateTimeAPI.Delta_FromDelta(days, seconds, useconds, 1, PyDateTimeAPI.DeltaType)
# More recognizable getters for date/time/datetime/timedelta.
# There are no setters because datetime.h hasn't them.
# This is because of immutable nature of these objects by design.
# If you would change time/date/datetime/timedelta object you need to recreate.
# Get tzinfo of time
cdef inline object time_tzinfo(object o):
if (<PyDateTime_Time*>o).hastzinfo:
return <object>(<PyDateTime_Time*>o).tzinfo
else:
return None
# Get tzinfo of datetime
cdef inline object datetime_tzinfo(object o):
if (<PyDateTime_DateTime*>o).hastzinfo:
return <object>(<PyDateTime_DateTime*>o).tzinfo
else:
return None
# Get year of date
cdef inline int date_year(object o):
return PyDateTime_GET_YEAR(o)
# Get month of date
cdef inline int date_month(object o):
return PyDateTime_GET_MONTH(o)
# Get day of date
cdef inline int date_day(object o):
return PyDateTime_GET_DAY(o)
# Get year of datetime
cdef inline int datetime_year(object o):
return PyDateTime_GET_YEAR(o)
# Get month of datetime
cdef inline int datetime_month(object o):
return PyDateTime_GET_MONTH(o)
# Get day of datetime
cdef inline int datetime_day(object o):
return PyDateTime_GET_DAY(o)
# Get hour of time
cdef inline int time_hour(object o):
return PyDateTime_TIME_GET_HOUR(o)
# Get minute of time
cdef inline int time_minute(object o):
return PyDateTime_TIME_GET_MINUTE(o)
# Get second of time
cdef inline int time_second(object o):
return PyDateTime_TIME_GET_SECOND(o)
# Get microsecond of time
cdef inline int time_microsecond(object o):
return PyDateTime_TIME_GET_MICROSECOND(o)
# Get hour of datetime
cdef inline int datetime_hour(object o):
return PyDateTime_DATE_GET_HOUR(o)
# Get minute of datetime
cdef inline int datetime_minute(object o):
return PyDateTime_DATE_GET_MINUTE(o)
# Get second of datetime
cdef inline int datetime_second(object o):
return PyDateTime_DATE_GET_SECOND(o)
# Get microsecond of datetime
cdef inline int datetime_microsecond(object o):
return PyDateTime_DATE_GET_MICROSECOND(o)
# Get days of timedelta
cdef inline int timedelta_days(object o):
return (<PyDateTime_Delta*>o).days
# Get seconds of timedelta
cdef inline int timedelta_seconds(object o):
return (<PyDateTime_Delta*>o).seconds
# Get microseconds of timedelta
cdef inline int timedelta_microseconds(object o):
return (<PyDateTime_Delta*>o).microseconds
...@@ -595,7 +595,12 @@ static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) { ...@@ -595,7 +595,12 @@ static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
result = __Pyx_PyObject_GetAttrStr($builtins_cname, name); result = __Pyx_PyObject_GetAttrStr($builtins_cname, name);
} }
if (!result) { if (!result) {
PyErr_SetObject(PyExc_NameError, name); PyErr_Format(PyExc_NameError,
#if PY_MAJOR_VERSION >= 3
"global name '%U' is not defined", name);
#else
"global name '%s' is not defined", PyString_AS_STRING(name));
#endif
} }
} }
return result; return result;
......
...@@ -24,6 +24,21 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*); ...@@ -24,6 +24,21 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s) #define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s) #define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#if PY_MAJOR_VERSION < 3
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
{
const Py_UNICODE *u_end = u;
while (*u_end++) ;
return u_end - u - 1;
}
#else
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
#endif
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) #define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) #define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
......
...@@ -546,3 +546,56 @@ code will run in plain C code, (actually using a switch statement):: ...@@ -546,3 +546,56 @@ code will run in plain C code, (actually using a switch statement)::
Combined with the looping optimisation above, this can result in very Combined with the looping optimisation above, this can result in very
efficient character switching code, e.g. in unicode parsers. efficient character switching code, e.g. in unicode parsers.
Windows and wide character APIs
-------------------------------
Windows system APIs natively support Unicode in the form of
zero-terminated UTF-16 encoded :c:type:`wchar_t*` strings, so called
"wide strings".
By default, Windows builds of CPython define :c:type:`Py_UNICODE` as
a synonym for :c:type:`wchar_t`. This makes internal ``unicode``
representation compatible with UTF-16 and allows for efficient zero-copy
conversions. This also means that Windows builds are always
`Narrow Unicode builds`_ with all the caveats.
To aid interoperation with Windows APIs, Cython 0.19 supports wide
strings (in the form of :c:type:`Py_UNICODE*`) and implicitly converts
them to and from ``unicode`` string objects. These conversions behave the
same way as they do for :c:type:`char*` and ``bytes`` as described in
`Passing byte strings`_.
In addition to automatic conversion, unicode literals that appear
in C context become C-level wide string literals and :py:func:`len`
built-in function is specialized to compute the length of zero-terminated
:c:type:`Py_UNICODE*` string or array.
Here is an example of how one would call a Unicode API on Windows::
cdef extern from "Windows.h":
ctypedef Py_UNICODE WCHAR
ctypedef const WCHAR* LPCWSTR
ctypedef void* HWND
int MessageBoxW(HWND hWnd, LPCWSTR lpText, LPCWSTR lpCaption, int uType)
title = u"Windows Interop Demo - Python %d.%d.%d" % sys.version_info[:3]
MessageBoxW(NULL, u"Hello Cython \u263a", title, 0)
.. Warning::
The use of :c:type:`Py_UNICODE*` strings outside of Windows is
strongly discouraged. :c:type:`Py_UNICODE` is inherently not
portable between different platforms and Python versions.
CPython 3.3 has moved to a flexible internal representation of
unicode strings (:pep:`393`), making all :c:type:`Py_UNICODE` related
APIs deprecated and inefficient.
One consequence of CPython 3.3 changes is that :py:func:`len` of
``unicode`` strings is always measured in *code points* ("characters"),
while Windows API expect the number of UTF-16 *code units*
(where each surrogate is counted individually). To always get the number
of code units, call :c:func:`PyUnicode_GetSize` directly.
...@@ -124,6 +124,17 @@ def pyx_to_dll(filename, ext = None, force_rebuild = 0, ...@@ -124,6 +124,17 @@ def pyx_to_dll(filename, ext = None, force_rebuild = 0,
basename + '.reload%s'%count) basename + '.reload%s'%count)
try: try:
import shutil # late import / reload_support is: debugging import shutil # late import / reload_support is: debugging
try:
# Try to unlink first --- if the .so file
# is mmapped by another process,
# overwriting its contents corrupts the
# loaded image (on Linux) and crashes the
# other process. On Windows, unlinking an
# open file just fails.
if os.path.isfile(r_path):
os.unlink(r_path)
except OSError:
continue
shutil.copy2(org_path, r_path) shutil.copy2(org_path, r_path)
so_path = r_path so_path = r_path
except IOError: except IOError:
......
# mode: error # mode: error
# tag: werror, charptr, conversion, temp # tag: werror, charptr, conversion, temp, py_unicode_strings
cdef bytes c_s = b"abc" cdef bytes c_s = b"abc"
s = b"abc" s = b"abc"
...@@ -18,7 +18,28 @@ cptr = s ...@@ -18,7 +18,28 @@ cptr = s
# temp => error # temp => error
cptr = s + b"cba" cptr = s + b"cba"
cdef unicode c_u = u"abc"
u = u"abc"
cdef Py_UNICODE* cuptr
# constant => ok
cuptr = u"xyz"
# global cdef variable => ok
cuptr = c_u
# pyglobal => warning
cuptr = u
# temp => error
cuptr = u + u"cba"
_ERRORS = """ _ERRORS = """
16:8: Obtaining char* from externally modifiable global Python value 16:8: Obtaining 'char *' from externally modifiable global Python value
19:9: Obtaining char* from temporary Python value 19:9: Obtaining 'char *' from temporary Python value
34:9: Obtaining 'Py_UNICODE *' from externally modifiable global Python value
37:10: Obtaining 'Py_UNICODE *' from temporary Python value
""" """
...@@ -15,5 +15,5 @@ _ERRORS = """ ...@@ -15,5 +15,5 @@ _ERRORS = """
4:14: Only single-character string literals can be coerced into ints. 4:14: Only single-character string literals can be coerced into ints.
5:14: Only single-character string literals can be coerced into ints. 5:14: Only single-character string literals can be coerced into ints.
8:15: Only single-character string literals can be coerced into ints. 8:15: Only single-character string literals can be coerced into ints.
11:14: Unicode literals do not support coercion to C types other than Py_UNICODE or Py_UCS4. 11:14: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
""" """
# mode: error # mode: error
# coding: ASCII # coding: ASCII
# tag: py_unicode_strings
# ok: # ok:
cdef char* c1 = "abc" cdef char* c1 = "abc"
cdef str s1 = "abc" cdef str s1 = "abc"
cdef unicode u1 = u"abc" cdef unicode u1 = u"abc"
cdef Py_UNICODE* cu1 = u1
cdef bytes b1 = b"abc" cdef bytes b1 = b"abc"
cdef char* c2 = b"abc" cdef char* c2 = b"abc"
...@@ -21,12 +23,18 @@ o4 = c1 ...@@ -21,12 +23,18 @@ o4 = c1
o5 = b1 o5 = b1
o6 = s1 o6 = s1
o7 = u1 o7 = u1
o8 = cu1
# errors: # errors:
cdef char* c_f1 = u"abc" cdef char* c_f1 = u"abc"
cdef char* c_f2 = u1 cdef char* c_f2 = u1
cdef char* c_f3 = s1 cdef char* c_f3 = s1
cdef Py_UNICODE* cu_f1 = c1
cdef Py_UNICODE* cu_f2 = b1
cdef Py_UNICODE* cu_f3 = s1
cdef Py_UNICODE* cu_f4 = b"abc"
cdef bytes b_f1 = u"abc" cdef bytes b_f1 = u"abc"
cdef bytes b_f2 = u1 cdef bytes b_f2 = u1
cdef bytes b_f3 = s1 cdef bytes b_f3 = s1
...@@ -56,31 +64,36 @@ print <unicode>c1 ...@@ -56,31 +64,36 @@ print <unicode>c1
print <unicode>c1[1:2] print <unicode>c1[1:2]
_ERRORS = u""" _ERRORS = u"""
26:20: Unicode literals do not support coercion to C types other than Py_UNICODE or Py_UCS4. 29:20: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
27:22: Unicode objects do not support coercion to C types. 30:22: Unicode objects only support coercion to Py_UNICODE*.
28:22: 'str' objects do not support coercion to C types (use 'bytes'?). 31:22: 'str' objects do not support coercion to C types (use 'bytes'?).
30:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required. 33:27: Cannot assign type 'char *' to 'Py_UNICODE *'
31:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required. 34:27: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
32:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable. 35:27: 'str' objects do not support coercion to C types (use 'unicode'?).
36:25: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
34:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
35:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3. 38:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
36:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding. 39:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
37:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding. 40:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
39:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'') 42:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
40:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'') 43:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
41:20: Cannot convert 'bytes' object to unicode implicitly, decoding required 44:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
42:22: Cannot convert 'bytes' object to unicode implicitly, decoding required 45:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
43:22: Cannot convert 'char*' to unicode implicitly, decoding required
47:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
45:19: Cannot assign type 'str object' to 'tuple object' 48:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
46:18: Cannot assign type 'unicode object' to 'tuple object' 49:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
47:18: Cannot assign type 'bytes object' to 'tuple object' 50:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
51:22: Cannot convert 'char*' to unicode implicitly, decoding required
53:13: default encoding required for conversion from 'char *' to 'str object'
54:13: default encoding required for conversion from 'char *' to 'str object' 53:19: Cannot assign type 'str object' to 'tuple object'
55:17: Cannot convert 'char*' to unicode implicitly, decoding required 54:18: Cannot assign type 'unicode object' to 'tuple object'
56:17: default encoding required for conversion from 'char *' to 'unicode object' 55:18: Cannot assign type 'bytes object' to 'tuple object'
61:13: default encoding required for conversion from 'char *' to 'str object'
62:13: default encoding required for conversion from 'char *' to 'str object'
63:17: Cannot convert 'char*' to unicode implicitly, decoding required
64:17: default encoding required for conversion from 'char *' to 'unicode object'
""" """
...@@ -131,3 +131,15 @@ def str_slicing2(): ...@@ -131,3 +131,15 @@ def str_slicing2():
str3 = 'abc\xE9def'[2:4] str3 = 'abc\xE9def'[2:4]
return str0, str1, str2, str3 return str0, str1, str2, str3
@cython.test_fail_if_path_exists(
"//IfStatNode",
)
def str_in_and_not_in():
"""
>>> str_in_and_not_in()
True
"""
if 'a' in 'abc' and 'b' in 'abc' and 'c' in 'abc' and 'd' not in 'abc': return True
else: return False
# coding: utf-8
from cpython.datetime cimport import_datetime
from cpython.datetime cimport date, time, datetime, timedelta, PyDateTime_IMPORT
import_datetime()
def test_date(int year, int month, int day):
'''
>>> val = test_date(2012, 12, 31)
>>> print(val)
2012-12-31
'''
val = date(year, month, day)
return val
def test_time(int hour, int minute, int second, int microsecond):
'''
>>> val = test_time(12, 20, 55, 0)
>>> print(val)
12:20:55
'''
val = time(hour, minute, second, microsecond)
return val
def test_datetime(int year, int month, int day, int hour, int minute, int second, int microsecond):
'''
>>> val = test_datetime(2012, 12, 31, 12, 20, 55, 0)
>>> print(val)
2012-12-31 12:20:55
'''
val = datetime(year, month, day, hour, minute, second, microsecond)
return val
def test_timedelta(int days, int seconds, int useconds):
'''
>>> val = test_timedelta(30, 0, 0)
>>> print(val)
30 days, 0:00:00
'''
val = timedelta(days, seconds, useconds)
return val
from cpython.datetime cimport import_datetime
from cpython.datetime cimport time_new, date_new, datetime_new, timedelta_new
from cpython.datetime cimport time_tzinfo, datetime_tzinfo
from cpython.datetime cimport time_hour, time_minute, time_second, time_microsecond
from cpython.datetime cimport date_day, date_month, date_year
from cpython.datetime cimport datetime_day, datetime_month, datetime_year
from cpython.datetime cimport datetime_hour, datetime_minute, datetime_second, \
datetime_microsecond
from cpython.datetime cimport timedelta_days, timedelta_seconds, timedelta_microseconds
import_datetime()
def test_date(int year, int month, int day):
'''
>>> test_date(2012,12,31)
(True, True, True)
'''
o = date_new(year, month, day)
return o.year == date_year(o), \
o.month == date_month(o), \
o.day == date_day(o)
def test_datetime(int year, int month, int day,
int hour, int minute, int second, int microsecond):
'''
>>> test_datetime(2012, 12, 31, 12, 30, 59, 12345)
(True, True, True, True, True, True, True)
'''
o = datetime_new(year, month, day, hour, minute, second, microsecond, None)
return o.year == datetime_year(o), \
o.month == datetime_month(o), \
o.day == datetime_day(o), \
o.hour == datetime_hour(o), \
o.minute == datetime_minute(o), \
o.second == datetime_second(o), \
o.microsecond == datetime_microsecond(o)
def test_time(int hour, int minute, int second, int microsecond):
'''
>>> test_time(12, 30, 59, 12345)
(True, True, True, True)
'''
o = time_new(hour, minute, second, microsecond, None)
return o.hour == time_hour(o), \
o.minute == time_minute(o), \
o.second == time_second(o), \
o.microsecond == time_microsecond(o)
def test_timedelta(int days, int seconds, int microseconds):
'''
>>> test_timedelta(30, 1440, 123456)
(True, True, True)
'''
o = timedelta_new(days, seconds, microseconds)
return o.days == timedelta_days(o), \
o.seconds == timedelta_seconds(o), \
o.microseconds == timedelta_microseconds(o)
# coding: utf-8
#cimport cpython.datetime as cy_datetime
#from datetime import time, date, datetime, timedelta, tzinfo
from cpython.datetime cimport import_datetime
from cpython.datetime cimport time_new, date_new, datetime_new, timedelta_new
from cpython.datetime cimport time_tzinfo, datetime_tzinfo
from cpython.datetime cimport time_hour, time_minute, time_second, time_microsecond
from cpython.datetime cimport date_day, date_month, date_year
from cpython.datetime cimport datetime_day, datetime_month, datetime_year
from cpython.datetime cimport datetime_hour, datetime_minute, datetime_second, \
datetime_microsecond
import datetime as py_datetime
import_datetime()
ZERO = py_datetime.timedelta(0)
#
# Simple class from datetime docs
#
class FixedOffset(py_datetime.tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name):
self._offset = py_datetime.timedelta(minutes = offset)
self._name = name
def utcoffset(self, dt):
return self._offset
def tzname(self, dt):
return self._name
def dst(self, dt):
return ZERO
def do_date(int year, int month, int day):
"""
>>> do_date(2012, 12, 31)
(True, True, True, True)
"""
v = date_new(year, month, day)
return type(v) is py_datetime.date, v.year == year, v.month == month, v.day == day
def do_datetime(int year, int month, int day,
int hour, int minute, int second, int microsecond):
"""
>>> do_datetime(2012, 12, 31, 12, 23, 0, 0)
(True, True, True, True, True, True, True, True, True)
"""
v = datetime_new(year, month, day, hour, minute, second, microsecond, None)
return type(v) is py_datetime.datetime, v.year == year, v.month == month, v.day == day, \
v.hour == hour, v.minute == minute, v.second == second, \
v.microsecond == microsecond, v.tzinfo is None
def do_time(int hour, int minute, int second, int microsecond):
"""
>>> do_time(12, 23, 0, 0)
(True, True, True, True, True, True)
"""
v = time_new(hour, minute, second, microsecond, None)
return type(v) is py_datetime.time, \
v.hour == hour, v.minute == minute, v.second == second, \
v.microsecond == microsecond, v.tzinfo is None
def do_time_tzinfo(int hour, int minute, int second, int microsecond, object tz):
"""
>>> tz = FixedOffset(60*3, 'Moscow')
>>> do_time_tzinfo(12, 23, 0, 0, tz)
(True, True, True, True, True, True)
"""
v = time_new(hour, minute, second, microsecond, tz)
return type(v) is py_datetime.time, \
v.hour == hour, v.minute == minute, v.second == second, \
v.microsecond == microsecond, v.tzinfo is tz
def do_datetime_tzinfo(int year, int month, int day,
int hour, int minute, int second, int microsecond, object tz):
"""
>>> tz = FixedOffset(60*3, 'Moscow')
>>> do_datetime_tzinfo(2012, 12, 31, 12, 23, 0, 0, tz)
(True, True, True, True, True, True, True, True, True)
"""
v = datetime_new(year, month, day, hour, minute, second, microsecond, tz)
return type(v) is py_datetime.datetime, v.year == year, v.month == month, v.day == day, \
v.hour == hour, v.minute == minute, v.second == second, \
v.microsecond == microsecond, v.tzinfo is tz
def do_time_tzinfo2(int hour, int minute, int second, int microsecond, object tz):
"""
>>> tz = FixedOffset(60*3, 'Moscow')
>>> do_time_tzinfo2(12, 23, 0, 0, tz)
(True, True, True, True, True, True, True, True)
"""
v = time_new(hour, minute, second, microsecond, None)
v1 = time_new(
time_hour(v),
time_minute(v),
time_second(v),
time_microsecond(v),
tz)
r1 = (v1.tzinfo == tz)
r2 = (tz == time_tzinfo(v1))
v2 = time_new(
time_hour(v1),
time_minute(v1),
time_second(v1),
time_microsecond(v1),
None)
r3 = (v2.tzinfo == None)
r4 = (None == time_tzinfo(v2))
v3 = time_new(
time_hour(v2),
time_minute(v2),
time_second(v2),
time_microsecond(v2),
tz)
r5 = (v3.tzinfo == tz)
r6 = (tz == time_tzinfo(v3))
r7 = (v2 == v)
r8 = (v3 == v1)
return r1, r2, r3, r4, r5, r6, r7, r8
def do_datetime_tzinfo2(int year, int month, int day,
int hour, int minute, int second, int microsecond, object tz):
"""
>>> tz = FixedOffset(60*3, 'Moscow')
>>> do_datetime_tzinfo2(2012, 12, 31, 12, 23, 0, 0, tz)
(True, True, True, True, True, True, True, True)
"""
v = datetime_new(year, month, day, hour, minute, second, microsecond, None)
v1 = datetime_new(
datetime_year(v),
datetime_month(v),
datetime_day(v),
datetime_hour(v),
datetime_minute(v),
datetime_second(v),
datetime_microsecond(v),
tz)
r1 = (v1.tzinfo == tz)
r2 = (tz == datetime_tzinfo(v1))
v2 = datetime_new(
datetime_year(v1),
datetime_month(v1),
datetime_day(v1),
datetime_hour(v1),
datetime_minute(v1),
datetime_second(v1),
datetime_microsecond(v1),
None)
r3 = (v2.tzinfo == None)
r4 = (None == datetime_tzinfo(v2))
v3 = datetime_new(
datetime_year(v2),
datetime_month(v2),
datetime_day(v2),
datetime_hour(v2),
datetime_minute(v2),
datetime_second(v2),
datetime_microsecond(v2),
tz)
r5 = (v3.tzinfo == tz)
r6 = (tz == datetime_tzinfo(v3))
r7 = (v2 == v)
r8 = (v3 == v1)
return r1, r2, r3, r4, r5, r6, r7, r8
# tag: py_unicode_strings
import sys
cimport cython
from libc.string cimport memcpy, strcpy
cdef bint Py_UNICODE_equal(const Py_UNICODE* u1, const Py_UNICODE* u2):
while u1[0] != 0 and u2[0] != 0 and u1[0] == u2[0]:
u1 += 1
u2 += 1
return u1[0] == u2[0]
ctypedef Py_UNICODE* LPWSTR
cdef unicode uobj = u'unicode\u1234'
cdef unicode uobj1 = u'u'
cdef Py_UNICODE* c_pu_str = u"unicode\u1234"
cdef Py_UNICODE c_pu_arr[42]
cdef LPWSTR c_wstr = u"unicode\u1234"
cdef Py_UNICODE* c_pu_empty = u""
cdef char* c_empty = ""
cdef unicode uwide_literal = u'\U00020000\U00020001'
cdef Py_UNICODE* c_pu_wide_literal = u'\U00020000\U00020001'
memcpy(c_pu_arr, c_pu_str, sizeof(Py_UNICODE) * (len(uobj) + 1))
def test_c_to_python():
"""
>>> test_c_to_python()
"""
assert c_pu_arr == uobj
assert c_pu_str == uobj
assert c_wstr == uobj
assert c_pu_arr[1:] == uobj[1:]
assert c_pu_str[1:] == uobj[1:]
assert c_wstr[1:] == uobj[1:]
assert c_pu_arr[:1] == uobj[:1]
assert c_pu_arr[:1] == uobj[:1]
assert c_pu_str[:1] == uobj[:1]
assert c_wstr[:1] == uobj[:1]
assert c_pu_arr[1:7] == uobj[1:7]
assert c_pu_str[1:7] == uobj[1:7]
assert c_wstr[1:7] == uobj[1:7]
assert c_pu_arr[1] == uobj[1]
assert c_pu_str[1] == uobj[1]
assert c_wstr[1] == uobj[1]
assert len(c_pu_str) == 8
assert len(c_pu_arr) == 8
assert len(c_wstr) == 8
assert sizeof(c_pu_arr) == sizeof(Py_UNICODE) * 42
assert sizeof(c_pu_str) == sizeof(void*)
assert c_pu_wide_literal == uwide_literal
if sizeof(Py_UNICODE) >= 4:
assert len(c_pu_wide_literal) == 2
else:
assert len(c_pu_wide_literal) == 4
if sys.version_info >= (3, 3):
# Make sure len(unicode) is not reverted to pre-3.3 behavior
assert len(uwide_literal) == 2
assert u'unicode'
assert not u''
assert c_pu_str
assert c_pu_empty
def test_python_to_c():
"""
>>> test_python_to_c()
"""
cdef unicode u
assert Py_UNICODE_equal(c_pu_arr, uobj)
assert Py_UNICODE_equal(c_pu_str, uobj)
assert Py_UNICODE_equal(c_pu_str, <LPWSTR>uobj)
u = uobj[1:]
assert Py_UNICODE_equal(c_pu_str + 1, u)
assert Py_UNICODE_equal(c_wstr + 1, u)
u = uobj[:1]
assert Py_UNICODE_equal(<Py_UNICODE*>u"u", u)
u = uobj[1:7]
assert Py_UNICODE_equal(<Py_UNICODE*>u"nicode", u)
u = uobj[1]
assert Py_UNICODE_equal(<Py_UNICODE*>u"n", u)
assert Py_UNICODE_equal(uwide_literal, <Py_UNICODE*>c_pu_wide_literal)
assert len(u"abc\0") == 4
assert len(<Py_UNICODE*>u"abc\0") == 3
# mode: run
# tag: global, nameerror
try:
from heapq import * # just to confuse the compiler
except ImportError:
pass
def f(a):
"""
>>> f(1)
Traceback (most recent call last):
NameError: global name 'definitely_unknown_name' is not defined
"""
a = f
a = definitely_unknown_name
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment