Commit a664239a authored by Nikita Nemkin's avatar Nikita Nemkin

Basic support for Py_UNICODE* strings.

parent 0d651b18
...@@ -778,6 +778,16 @@ class StringConst(object): ...@@ -778,6 +778,16 @@ class StringConst(object):
self.py_strings[key] = py_string self.py_strings[key] = py_string
return py_string return py_string
class UnicodeConst(object):
"""Global info about a Py_UNICODE[] constant held by GlobalState.
"""
# cname string
# text EncodedString (unicode)
def __init__(self, cname, text):
self.cname = cname
self.text = text
class PyStringConst(object): class PyStringConst(object):
"""Global info about a Python string constant held by GlobalState. """Global info about a Python string constant held by GlobalState.
""" """
...@@ -873,6 +883,7 @@ class GlobalState(object): ...@@ -873,6 +883,7 @@ class GlobalState(object):
self.const_cname_counter = 1 self.const_cname_counter = 1
self.string_const_index = {} self.string_const_index = {}
self.unicode_const_index = {}
self.int_const_index = {} self.int_const_index = {}
self.py_constants = [] self.py_constants = []
...@@ -1016,6 +1027,16 @@ class GlobalState(object): ...@@ -1016,6 +1027,16 @@ class GlobalState(object):
c.add_py_version(py_version) c.add_py_version(py_version)
return c return c
def get_unicode_const(self, text):
# return a Py_UNICODE[] constant, creating a new one if necessary
assert text.is_unicode
try:
c = self.unicode_const_index[text]
except KeyError:
c = UnicodeConst(self.new_const_cname(), text)
self.unicode_const_index[text] = c
return c
def get_py_string_const(self, text, identifier=None, def get_py_string_const(self, text, identifier=None,
is_str=False, unicode_value=None): is_str=False, unicode_value=None):
# return a Python string constant, creating a new one if necessary # return a Python string constant, creating a new one if necessary
...@@ -1141,6 +1162,10 @@ class GlobalState(object): ...@@ -1141,6 +1162,10 @@ class GlobalState(object):
for py_string in c.py_strings.values(): for py_string in c.py_strings.values():
py_strings.append((c.cname, len(py_string.cname), py_string)) py_strings.append((c.cname, len(py_string.cname), py_string))
for c in self.unicode_const_index.values():
decls_writer.putln('static Py_UNICODE %s[] = { %s };' % (
c.cname, StringEncoding.encode_py_unicode_string(c.text)))
if py_strings: if py_strings:
self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c")) self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c"))
py_strings.sort() py_strings.sort()
...@@ -1435,6 +1460,9 @@ class CCodeWriter(object): ...@@ -1435,6 +1460,9 @@ class CCodeWriter(object):
def get_string_const(self, text): def get_string_const(self, text):
return self.globalstate.get_string_const(text).cname return self.globalstate.get_string_const(text).cname
def get_unicode_const(self, text):
return self.globalstate.get_unicode_const(text).cname
def get_py_string_const(self, text, identifier=None, def get_py_string_const(self, text, identifier=None,
is_str=False, unicode_value=None): is_str=False, unicode_value=None):
return self.globalstate.get_py_string_const( return self.globalstate.get_py_string_const(
......
...@@ -63,14 +63,16 @@ coercion_error_dict = { ...@@ -63,14 +63,16 @@ coercion_error_dict = {
# string related errors # string related errors
(Builtin.unicode_type, Builtin.bytes_type) : "Cannot convert Unicode string to 'bytes' implicitly, encoding required.", (Builtin.unicode_type, Builtin.bytes_type) : "Cannot convert Unicode string to 'bytes' implicitly, encoding required.",
(Builtin.unicode_type, Builtin.str_type) : "Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.", (Builtin.unicode_type, Builtin.str_type) : "Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.",
(Builtin.unicode_type, PyrexTypes.c_char_ptr_type) : "Unicode objects do not support coercion to C types.", (Builtin.unicode_type, PyrexTypes.c_char_ptr_type) : "Unicode objects only support coercion to Py_UNICODE*.",
(Builtin.unicode_type, PyrexTypes.c_uchar_ptr_type) : "Unicode objects do not support coercion to C types.", (Builtin.unicode_type, PyrexTypes.c_uchar_ptr_type) : "Unicode objects only support coercion to Py_UNICODE*.",
(Builtin.bytes_type, Builtin.unicode_type) : "Cannot convert 'bytes' object to unicode implicitly, decoding required", (Builtin.bytes_type, Builtin.unicode_type) : "Cannot convert 'bytes' object to unicode implicitly, decoding required",
(Builtin.bytes_type, Builtin.str_type) : "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.", (Builtin.bytes_type, Builtin.str_type) : "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
(Builtin.bytes_type, PyrexTypes.c_py_unicode_ptr_type) : "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
(Builtin.str_type, Builtin.unicode_type) : "str objects do not support coercion to unicode, use a unicode string literal instead (u'')", (Builtin.str_type, Builtin.unicode_type) : "str objects do not support coercion to unicode, use a unicode string literal instead (u'')",
(Builtin.str_type, Builtin.bytes_type) : "Cannot convert 'str' to 'bytes' implicitly. This is not portable.", (Builtin.str_type, Builtin.bytes_type) : "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
(Builtin.str_type, PyrexTypes.c_char_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).", (Builtin.str_type, PyrexTypes.c_char_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).",
(Builtin.str_type, PyrexTypes.c_uchar_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).", (Builtin.str_type, PyrexTypes.c_uchar_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).",
(Builtin.str_type, PyrexTypes.c_py_unicode_ptr_type) : "'str' objects do not support coercion to C types (use 'unicode'?).",
(PyrexTypes.c_char_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required", (PyrexTypes.c_char_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_uchar_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required", (PyrexTypes.c_uchar_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
} }
...@@ -1171,8 +1173,8 @@ class BytesNode(ConstNode): ...@@ -1171,8 +1173,8 @@ class BytesNode(ConstNode):
return self.result_code return self.result_code
class UnicodeNode(PyConstNode): class UnicodeNode(ConstNode):
# A Python unicode object # A Py_UNICODE* or unicode literal
# #
# value EncodedString # value EncodedString
# bytes_value BytesLiteral the literal parsed as bytes string ('-3' unicode literals only) # bytes_value BytesLiteral the literal parsed as bytes string ('-3' unicode literals only)
...@@ -1213,7 +1215,11 @@ class UnicodeNode(PyConstNode): ...@@ -1213,7 +1215,11 @@ class UnicodeNode(PyConstNode):
if dst_type.is_string and self.bytes_value is not None: if dst_type.is_string and self.bytes_value is not None:
# special case: '-3' enforced unicode literal used in a C char* context # special case: '-3' enforced unicode literal used in a C char* context
return BytesNode(self.pos, value=self.bytes_value).coerce_to(dst_type, env) return BytesNode(self.pos, value=self.bytes_value).coerce_to(dst_type, env)
error(self.pos, "Unicode literals do not support coercion to C types other than Py_UNICODE or Py_UCS4.") if dst_type.is_unicode:
node = UnicodeNode(self.pos, value=self.value)
node.type = dst_type
return node
error(self.pos, "Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).")
elif dst_type is not py_object_type: elif dst_type is not py_object_type:
if not self.check_for_coercion_error(dst_type, env): if not self.check_for_coercion_error(dst_type, env):
self.fail_assignment(dst_type) self.fail_assignment(dst_type)
...@@ -1225,11 +1231,20 @@ class UnicodeNode(PyConstNode): ...@@ -1225,11 +1231,20 @@ class UnicodeNode(PyConstNode):
## and (0xD800 <= self.value[0] <= 0xDBFF) ## and (0xD800 <= self.value[0] <= 0xDBFF)
## and (0xDC00 <= self.value[1] <= 0xDFFF)) ## and (0xDC00 <= self.value[1] <= 0xDFFF))
def coerce_to_boolean(self, env):
bool_value = bool(self.value)
return BoolNode(self.pos, value=bool_value, constant_result=bool_value)
def contains_surrogates(self): def contains_surrogates(self):
return _string_contains_surrogates(self.value) return _string_contains_surrogates(self.value)
def generate_evaluation_code(self, code): def generate_evaluation_code(self, code):
if self.type.is_pyobject:
self.result_code = code.get_py_string_const(self.value) self.result_code = code.get_py_string_const(self.value)
else:
if self.contains_surrogates():
warning(self.pos, "Py_UNICODE* literals with characters outside BMP are not portable.", level=1);
self.result_code = code.get_unicode_const(self.value)
def calculate_result_code(self): def calculate_result_code(self):
return self.result_code return self.result_code
...@@ -2633,6 +2648,9 @@ class IndexNode(ExprNode): ...@@ -2633,6 +2648,9 @@ class IndexNode(ExprNode):
if base_type.is_string: if base_type.is_string:
# sliced C strings must coerce to Python # sliced C strings must coerce to Python
return bytes_type return bytes_type
elif base_type.is_unicode:
# sliced Py_UNICODE* strings must coerce to Python
return unicode_type
elif base_type in (unicode_type, bytes_type, str_type, list_type, tuple_type): elif base_type in (unicode_type, bytes_type, str_type, list_type, tuple_type):
# slicing these returns the same type # slicing these returns the same type
return base_type return base_type
...@@ -3446,6 +3464,8 @@ class SliceIndexNode(ExprNode): ...@@ -3446,6 +3464,8 @@ class SliceIndexNode(ExprNode):
base_type = self.base.infer_type(env) base_type = self.base.infer_type(env)
if base_type.is_string or base_type.is_cpp_class: if base_type.is_string or base_type.is_cpp_class:
return bytes_type return bytes_type
elif base_type.is_unicode:
return unicode_type
elif base_type in (bytes_type, str_type, unicode_type, elif base_type in (bytes_type, str_type, unicode_type,
list_type, tuple_type): list_type, tuple_type):
return base_type return base_type
...@@ -3510,6 +3530,8 @@ class SliceIndexNode(ExprNode): ...@@ -3510,6 +3530,8 @@ class SliceIndexNode(ExprNode):
base_type = self.base.type base_type = self.base.type
if base_type.is_string or base_type.is_cpp_string: if base_type.is_string or base_type.is_cpp_string:
self.type = default_str_type(env) self.type = default_str_type(env)
elif base_type.is_unicode:
self.type = unicode_type
elif base_type.is_ptr: elif base_type.is_ptr:
self.type = base_type self.type = base_type
elif base_type.is_array: elif base_type.is_array:
...@@ -3578,6 +3600,27 @@ class SliceIndexNode(ExprNode): ...@@ -3578,6 +3600,27 @@ class SliceIndexNode(ExprNode):
stop_code, stop_code,
start_code, start_code,
code.error_goto_if_null(result, self.pos))) code.error_goto_if_null(result, self.pos)))
elif self.base.type.is_unicode:
base_result = self.base.result()
if self.base.type != PyrexTypes.c_py_unicode_ptr_type:
base_result = '((const Py_UNICODE*)%s)' % base_result
if self.stop is None:
code.putln(
"%s = __Pyx_PyUnicode_FromUnicode(%s + %s); %s" % (
result,
base_result,
start_code,
code.error_goto_if_null(result, self.pos)))
else:
code.putln(
"%s = __Pyx_PyUnicode_FromUnicodeAndLength(%s + %s, %s - %s); %s" % (
result,
base_result,
start_code,
stop_code,
start_code,
code.error_goto_if_null(result, self.pos)))
elif self.base.type is unicode_type: elif self.base.type is unicode_type:
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c")) UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c"))
...@@ -4903,11 +4946,11 @@ class AttributeNode(ExprNode): ...@@ -4903,11 +4946,11 @@ class AttributeNode(ExprNode):
self.is_py_attr = 0 self.is_py_attr = 0
self.member = self.attribute self.member = self.attribute
if obj_type is None: if obj_type is None:
if self.obj.type.is_string: if self.obj.type.is_string or self.obj.type.is_unicode:
self.obj = self.obj.coerce_to_pyobject(env) self.obj = self.obj.coerce_to_pyobject(env)
obj_type = self.obj.type obj_type = self.obj.type
else: else:
if obj_type.is_string: if obj_type.is_string or obj_type.is_unicode:
obj_type = py_object_type obj_type = py_object_type
if obj_type.is_ptr or obj_type.is_array: if obj_type.is_ptr or obj_type.is_array:
obj_type = obj_type.base_type obj_type = obj_type.base_type
...@@ -8337,8 +8380,12 @@ class BinopNode(ExprNode): ...@@ -8337,8 +8380,12 @@ class BinopNode(ExprNode):
if self.is_py_operation_types(type1, type2): if self.is_py_operation_types(type1, type2):
if type2.is_string: if type2.is_string:
type2 = Builtin.bytes_type type2 = Builtin.bytes_type
elif type2.is_unicode:
type2 = Builtin.unicode_type
if type1.is_string: if type1.is_string:
type1 = Builtin.bytes_type type1 = Builtin.bytes_type
elif type1.is_unicode:
type1 = Builtin.unicode_type
elif self.operator == '%' \ elif self.operator == '%' \
and type1 in (Builtin.str_type, Builtin.unicode_type): and type1 in (Builtin.str_type, Builtin.unicode_type):
# note that b'%s' % b'abc' doesn't work in Py3 # note that b'%s' % b'abc' doesn't work in Py3
...@@ -8587,7 +8634,7 @@ class AddNode(NumBinopNode): ...@@ -8587,7 +8634,7 @@ class AddNode(NumBinopNode):
# '+' operator. # '+' operator.
def is_py_operation_types(self, type1, type2): def is_py_operation_types(self, type1, type2):
if type1.is_string and type2.is_string: if type1.is_string and type2.is_string or type1.is_unicode and type2.is_unicode:
return 1 return 1
else: else:
return NumBinopNode.is_py_operation_types(self, type1, type2) return NumBinopNode.is_py_operation_types(self, type1, type2)
...@@ -9950,7 +9997,7 @@ class CoerceToPyTypeNode(CoercionNode): ...@@ -9950,7 +9997,7 @@ class CoerceToPyTypeNode(CoercionNode):
# be specific about some known types # be specific about some known types
if arg.type.is_string or arg.type.is_cpp_string: if arg.type.is_string or arg.type.is_cpp_string:
self.type = default_str_type(env) self.type = default_str_type(env)
elif arg.type.is_unicode_char: elif arg.type.is_unicode or arg.type.is_unicode_char:
self.type = unicode_type self.type = unicode_type
elif arg.type.is_complex: elif arg.type.is_complex:
self.type = Builtin.complex_type self.type = Builtin.complex_type
...@@ -10065,13 +10112,13 @@ class CoerceFromPyTypeNode(CoercionNode): ...@@ -10065,13 +10112,13 @@ class CoerceFromPyTypeNode(CoercionNode):
if not result_type.create_from_py_utility_code(env): if not result_type.create_from_py_utility_code(env):
error(arg.pos, error(arg.pos,
"Cannot convert Python object to '%s'" % result_type) "Cannot convert Python object to '%s'" % result_type)
if self.type.is_string: if self.type.is_string or self.type.is_unicode:
if self.arg.is_ephemeral(): if self.arg.is_ephemeral():
error(arg.pos, error(arg.pos,
"Obtaining char* from temporary Python value") "Obtaining '%s' from temporary Python value" % result_type)
elif self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal: elif self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal:
warning(arg.pos, warning(arg.pos,
"Obtaining char* from externally modifiable global Python value", "Obtaining '%s' from externally modifiable global Python value" % result_type,
level=1) level=1)
def analyse_types(self, env): def analyse_types(self, env):
......
...@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None) PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None)
]) ])
Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_py_unicode_ptr_type, None)
])
PyObject_Size_func_type = PyrexTypes.CFuncType( PyObject_Size_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.c_py_ssize_t_type, [
PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None) PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
...@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
_ext_types_with_pysize = set(["cpython.array.array"]) _ext_types_with_pysize = set(["cpython.array.array"])
def _handle_simple_function_len(self, node, pos_args): def _handle_simple_function_len(self, node, pos_args):
"""Replace len(char*) by the equivalent call to strlen() and """Replace len(char*) by the equivalent call to strlen(),
len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and
len(known_builtin_type) by an equivalent C-API call. len(known_builtin_type) by an equivalent C-API call.
""" """
if len(pos_args) != 1: if len(pos_args) != 1:
...@@ -2011,6 +2017,12 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2011,6 +2017,12 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
args = [arg], args = [arg],
is_temp = node.is_temp, is_temp = node.is_temp,
utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c")) utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
elif arg.type.is_unicode:
new_node = ExprNodes.PythonCapiCallNode(
node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
args = [arg],
is_temp = node.is_temp,
utility_code = UtilityCode.load_cached("py_unicode_strlen", "StringTools.c"))
elif arg.type.is_pyobject: elif arg.type.is_pyobject:
cfunc_name = self._map_to_capi_len_function(arg.type) cfunc_name = self._map_to_capi_len_function(arg.type)
if cfunc_name is None: if cfunc_name is None:
......
...@@ -145,6 +145,7 @@ class PyrexType(BaseType): ...@@ -145,6 +145,7 @@ class PyrexType(BaseType):
# is_enum boolean Is a C enum type # is_enum boolean Is a C enum type
# is_typedef boolean Is a typedef type # is_typedef boolean Is a typedef type
# is_string boolean Is a C char * type # is_string boolean Is a C char * type
# is_unicode boolean Is a C PyUNICODE * type
# is_cpp_string boolean Is a C++ std::string type # is_cpp_string boolean Is a C++ std::string type
# is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE # is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE
# is_returncode boolean Is used only to signal exceptions # is_returncode boolean Is used only to signal exceptions
...@@ -202,6 +203,7 @@ class PyrexType(BaseType): ...@@ -202,6 +203,7 @@ class PyrexType(BaseType):
is_enum = 0 is_enum = 0
is_typedef = 0 is_typedef = 0
is_string = 0 is_string = 0
is_unicode = 0
is_unicode_char = 0 is_unicode_char = 0
is_returncode = 0 is_returncode = 0
is_error = 0 is_error = 0
...@@ -871,7 +873,7 @@ class PyObjectType(PyrexType): ...@@ -871,7 +873,7 @@ class PyObjectType(PyrexType):
def assignable_from(self, src_type): def assignable_from(self, src_type):
# except for pointers, conversion will be attempted # except for pointers, conversion will be attempted
return not src_type.is_ptr or src_type.is_string return not src_type.is_ptr or src_type.is_string or src_type.is_unicode
def declaration_code(self, entity_code, def declaration_code(self, entity_code,
for_display = 0, dll_linkage = None, pyrex = 0): for_display = 0, dll_linkage = None, pyrex = 0):
...@@ -1161,7 +1163,7 @@ class CType(PyrexType): ...@@ -1161,7 +1163,7 @@ class CType(PyrexType):
def error_condition(self, result_code): def error_condition(self, result_code):
conds = [] conds = []
if self.is_string: if self.is_string or self.is_unicode:
conds.append("(!%s)" % result_code) conds.append("(!%s)" % result_code)
elif self.exception_value is not None: elif self.exception_value is not None:
conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value)) conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value))
...@@ -2178,6 +2180,9 @@ class CPointerBaseType(CType): ...@@ -2178,6 +2180,9 @@ class CPointerBaseType(CType):
if base_type.same_as(char_type): if base_type.same_as(char_type):
self.is_string = 1 self.is_string = 1
break break
else:
if base_type.same_as(c_py_unicode_type):
self.is_unicode = 1
if self.is_string and not base_type.is_error: if self.is_string and not base_type.is_error:
if base_type.signed: if base_type.signed:
...@@ -2189,10 +2194,17 @@ class CPointerBaseType(CType): ...@@ -2189,10 +2194,17 @@ class CPointerBaseType(CType):
if self.is_ptr: if self.is_ptr:
self.from_py_function = "__Pyx_PyObject_AsUString" self.from_py_function = "__Pyx_PyObject_AsUString"
self.exception_value = "NULL" self.exception_value = "NULL"
elif self.is_unicode and not base_type.is_error:
self.to_py_function = "__Pyx_PyUnicode_FromUnicode"
if self.is_ptr:
self.from_py_function = "__Pyx_PyUnicode_AsUnicode"
self.exception_value = "NULL"
def py_type_name(self): def py_type_name(self):
if self.is_string: if self.is_string:
return "bytes" return "bytes"
elif self.is_unicode:
return "unicode"
else: else:
return super(CPointerBaseType, self).py_type_name() return super(CPointerBaseType, self).py_type_name()
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import re import re
import sys import sys
import array
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
_unicode, _str, _bytes = str, str, bytes _unicode, _str, _bytes = str, str, bytes
...@@ -262,3 +263,14 @@ def split_string_literal(s, limit=2000): ...@@ -262,3 +263,14 @@ def split_string_literal(s, limit=2000):
chunks.append(s[start:end]) chunks.append(s[start:end])
start = end start = end
return '""'.join(chunks) return '""'.join(chunks)
def encode_py_unicode_string(s):
"""Create Py_UNICODE[] representation of a given unicode string.
"""
# Non-BMP characters will appear as surrogates, which is not compatible with
# wide (UTF-32) Python builds. UnicodeNode will warn the user about this.
a = array.array('H', s.encode('UTF-16'))
a.pop(0) # Remove BOM
a.append(0) # Add NULL terminator
return u",".join(map(unicode, a))
...@@ -604,3 +604,17 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i ...@@ -604,3 +604,17 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i
index += PyBytes_GET_SIZE(bytes); index += PyBytes_GET_SIZE(bytes);
return PyBytes_AS_STRING(bytes)[index]; return PyBytes_AS_STRING(bytes)[index];
} }
/////////////// py_unicode_strlen.proto ///////////////
#if PY_VERSION_HEX < 0x03000000
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
{
const Py_UNICODE *u_end = u;
while (*u_end++) ;
return u_end - u - 1;
}
#else
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
#endif
/////////////// TypeConversions.proto /////////////// /////////////// TypeConversions.proto ///////////////
// @requires: py_unicode_strlen
/* Type Conversion Predeclarations */ /* Type Conversion Predeclarations */
...@@ -24,6 +25,14 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*); ...@@ -24,6 +25,14 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s) #define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s) #define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#if CYTHON_PEP393_ENABLED
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
#else
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AS_UNICODE
#endif
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) #define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) #define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment