Commit bf558ecf authored by Stefan Behnel's avatar Stefan Behnel

support redundant parsing of string literals as unicode *and* bytes string,...

support redundant parsing of string literals as unicode *and* bytes string, fix 'str' literal assignments to char* targets when using Future.unicode_literals
parent 184482af
...@@ -961,8 +961,10 @@ class BytesNode(ConstNode): ...@@ -961,8 +961,10 @@ class BytesNode(ConstNode):
class UnicodeNode(PyConstNode): class UnicodeNode(PyConstNode):
# A Python unicode object # A Python unicode object
# #
# value EncodedString # value EncodedString
# bytes_value BytesLiteral the literal parsed as bytes string ('-3' unicode literals only)
bytes_value = None
type = unicode_type type = unicode_type
def coerce_to(self, dst_type, env): def coerce_to(self, dst_type, env):
...@@ -975,6 +977,9 @@ class UnicodeNode(PyConstNode): ...@@ -975,6 +977,9 @@ class UnicodeNode(PyConstNode):
int_value = ord(self.value) int_value = ord(self.value)
return IntNode(self.pos, value=int_value, constant_result=int_value) return IntNode(self.pos, value=int_value, constant_result=int_value)
elif not dst_type.is_pyobject: elif not dst_type.is_pyobject:
if dst_type.is_string and self.bytes_value is not None:
# special case: '-3' enforced unicode literal used in a C char* context
return BytesNode(self.pos, value=self.bytes_value).coerce_to(dst_type, env)
error(self.pos, "Unicode literals do not support coercion to C types other than Py_UNICODE.") error(self.pos, "Unicode literals do not support coercion to C types other than Py_UNICODE.")
elif dst_type is not py_object_type: elif dst_type is not py_object_type:
if not self.check_for_coercion_error(dst_type): if not self.check_for_coercion_error(dst_type):
...@@ -1015,11 +1020,13 @@ class StringNode(PyConstNode): ...@@ -1015,11 +1020,13 @@ class StringNode(PyConstNode):
# A Python str object, i.e. a byte string in Python 2.x and a # A Python str object, i.e. a byte string in Python 2.x and a
# unicode string in Python 3.x # unicode string in Python 3.x
# #
# value BytesLiteral or EncodedString # value BytesLiteral
# unicode_value EncodedString
# is_identifier boolean # is_identifier boolean
type = str_type type = str_type
is_identifier = None is_identifier = None
unicode_value = None
def coerce_to(self, dst_type, env): def coerce_to(self, dst_type, env):
if dst_type is not py_object_type and not str_type.subtype_of(dst_type): if dst_type is not py_object_type and not str_type.subtype_of(dst_type):
......
This diff is collapsed.
...@@ -44,9 +44,15 @@ class UnicodeLiteralBuilder(object): ...@@ -44,9 +44,15 @@ class UnicodeLiteralBuilder(object):
def append_charval(self, char_number): def append_charval(self, char_number):
self.chars.append( unichr(char_number) ) self.chars.append( unichr(char_number) )
def append_uescape(self, char_number, escape_string):
self.append_charval(char_number)
def getstring(self): def getstring(self):
return EncodedString(u''.join(self.chars)) return EncodedString(u''.join(self.chars))
def getstrings(self):
return (None, self.getstring())
class BytesLiteralBuilder(object): class BytesLiteralBuilder(object):
"""Assemble a byte string or char value. """Assemble a byte string or char value.
...@@ -64,6 +70,9 @@ class BytesLiteralBuilder(object): ...@@ -64,6 +70,9 @@ class BytesLiteralBuilder(object):
def append_charval(self, char_number): def append_charval(self, char_number):
self.chars.append( unichr(char_number).encode('ISO-8859-1') ) self.chars.append( unichr(char_number).encode('ISO-8859-1') )
def append_uescape(self, char_number, escape_string):
self.append(escape_string)
def getstring(self): def getstring(self):
# this *must* return a byte string! # this *must* return a byte string!
s = BytesLiteral(join_bytes(self.chars)) s = BytesLiteral(join_bytes(self.chars))
...@@ -74,6 +83,32 @@ class BytesLiteralBuilder(object): ...@@ -74,6 +83,32 @@ class BytesLiteralBuilder(object):
# this *must* return a byte string! # this *must* return a byte string!
return self.getstring() return self.getstring()
def getstrings(self):
return (self.getstring(), None)
class StrLiteralBuilder(object):
"""Assemble both a bytes and a unicode representation of a string.
"""
def __init__(self, target_encoding):
self._bytes = BytesLiteralBuilder(target_encoding)
self._unicode = UnicodeLiteralBuilder()
def append(self, characters):
self._bytes.append(characters)
self._unicode.append(characters)
def append_charval(self, char_number):
self._bytes.append_charval(char_number)
self._unicode.append_charval(char_number)
def append_uescape(self, char_number, escape_string):
self._bytes.append(escape_string)
self._unicode.append_charval(char_number)
def getstrings(self):
return (self._bytes.getstring(), self._unicode.getstring())
class EncodedString(_unicode): class EncodedString(_unicode):
# unicode string subclass to keep track of the original encoding. # unicode string subclass to keep track of the original encoding.
# 'encoding' is None for unicode strings and the source encoding # 'encoding' is None for unicode strings and the source encoding
......
...@@ -7,6 +7,8 @@ if sys.version_info[0] >= 3: ...@@ -7,6 +7,8 @@ if sys.version_info[0] >= 3:
True True
>>> isinstance(u, str) >>> isinstance(u, str)
True True
>>> isinstance(b, bytes)
True
""" """
else: else:
__doc__ = u""" __doc__ = u"""
...@@ -14,6 +16,11 @@ else: ...@@ -14,6 +16,11 @@ else:
True True
>>> isinstance(u, unicode) >>> isinstance(u, unicode)
True True
>>> isinstance(b, bytes)
True
""" """
u = "test" u = "test"
cdef char* s = "bytes test"
b = s
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment