Commit 14986aea authored by Stefan Behnel's avatar Stefan Behnel

use a dedicated UnicodeType and UnicodeNode to represent unicode literals

fixes the unicode literal indexing problem (only for unicode strings, not for byte strings!)
parent 0227fc22
......@@ -738,6 +738,29 @@ class StringNode(ConstNode):
return self.entry.cname
class UnicodeNode(PyConstNode):
# entry Symtab.Entry
type = PyrexTypes.c_unicode_type
def analyse_types(self, env):
self.entry = env.add_string_const(self.value)
env.add_py_string(self.entry)
def calculate_result_code(self):
return self.entry.pystring_cname
def _coerce_to(self, dst_type, env):
if not dst_type.is_pyobject:
node = StringNode(self.pos, entry = entry, type = py_object_type)
return ConstNode.coerce_to(node, dst_type, env)
else:
return self
# We still need to perform normal coerce_to processing on the
# result, because we might be coercing to an extension type,
# in which case a type test node will be needed.
class IdentifierStringNode(ConstNode):
# A Python string that behaves like an identifier, e.g. for
# keyword arguments in a call, or for imported names
......
......@@ -492,6 +492,8 @@ def p_atom(s):
kind, value = p_cat_string_literal(s)
if kind == 'c':
return ExprNodes.CharNode(pos, value = value)
elif kind == 'u':
return ExprNodes.UnicodeNode(pos, value = value)
else:
return ExprNodes.StringNode(pos, value = value)
elif sy == 'IDENT':
......
......@@ -998,20 +998,6 @@ class CStringType:
return '"%s"' % Utils.escape_byte_string(value)
class CUTF8StringType:
# Mixin class for C unicode types.
is_string = 1
is_unicode = 1
to_py_function = "PyUnicode_DecodeUTF8"
exception_value = "NULL"
def literal_code(self, value):
assert isinstance(value, str)
return '"%s"' % Utils.escape_byte_string(value)
class CCharArrayType(CStringType, CArrayType):
# C 'char []' type.
......@@ -1022,16 +1008,6 @@ class CCharArrayType(CStringType, CArrayType):
CArrayType.__init__(self, c_char_type, size)
class CUTF8CharArrayType(CUTF8StringType, CArrayType):
# C 'char []' type.
parsetuple_format = "s"
pymemberdef_typecode = "T_STRING_INPLACE"
def __init__(self, size):
CArrayType.__init__(self, c_char_type, size)
class CCharPtrType(CStringType, CPtrType):
# C 'char *' type.
......@@ -1042,6 +1018,29 @@ class CCharPtrType(CStringType, CPtrType):
CPtrType.__init__(self, c_char_type)
class UnicodeType(BuiltinObjectType):
# The Python unicode type.
is_string = 1
is_unicode = 1
parsetuple_format = "O"
def __init__(self):
BuiltinObjectType.__init__(self, "unicode", "PyUnicodeObject")
def literal_code(self, value):
assert isinstance(value, str)
return '"%s"' % Utils.escape_byte_string(value)
def declaration_code(self, entity_code,
for_display = 0, dll_linkage = None, pyrex = 0):
if pyrex or for_display:
return self.base_declaration_code(self.name, entity_code)
else:
return "%s %s[]" % (public_decl("char", dll_linkage), entity_code)
class ErrorType(PyrexType):
# Used to prevent propagation of error messages.
......@@ -1106,7 +1105,7 @@ c_longdouble_type = CFloatType(8, typestring="g")
c_null_ptr_type = CNullPtrType(c_void_type)
c_char_array_type = CCharArrayType(None)
c_utf8_char_array_type = CUTF8CharArrayType(None)
c_unicode_type = UnicodeType()
c_char_ptr_type = CCharPtrType()
c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
c_py_ssize_t_ptr_type = CPtrType(c_py_ssize_t_type)
......
......@@ -504,7 +504,7 @@ class Scope:
else:
cname = self.new_const_cname()
if value.is_unicode:
c_type = PyrexTypes.c_utf8_char_array_type
c_type = PyrexTypes.c_unicode_type
value = value.utf8encode()
else:
c_type = PyrexTypes.c_char_array_type
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment