Commit 297d7c2b authored by Stefan Behnel's avatar Stefan Behnel

Fix a compiler crash when non-ASCII characters appear in unprefixed strings in "3str" parsing mode.

parent 3b3bcbfc
...@@ -958,7 +958,8 @@ def p_string_literal(s, kind_override=None): ...@@ -958,7 +958,8 @@ def p_string_literal(s, kind_override=None):
error(pos, u"invalid character literal: %r" % bytes_value) error(pos, u"invalid character literal: %r" % bytes_value)
else: else:
bytes_value, unicode_value = chars.getstrings() bytes_value, unicode_value = chars.getstrings()
if is_python3_source and has_non_ascii_literal_characters: if (has_non_ascii_literal_characters
and is_python3_source and Future.unicode_literals in s.context.future_directives):
# Python 3 forbids literal non-ASCII characters in byte strings # Python 3 forbids literal non-ASCII characters in byte strings
if kind == 'b': if kind == 'b':
s.error("bytes can only contain ASCII literal characters.", pos=pos) s.error("bytes can only contain ASCII literal characters.", pos=pos)
......
...@@ -21,7 +21,8 @@ True ...@@ -21,7 +21,8 @@ True
""" """
import sys import sys
if sys.version_info[0] >= 3: IS_PY2 = sys.version_info[0] < 3
if not IS_PY2:
__doc__ = __doc__.replace(" u'", " '") __doc__ = __doc__.replace(" u'", " '")
def locals_function(a, b=2): def locals_function(a, b=2):
...@@ -312,6 +313,45 @@ def unicode_literals(): ...@@ -312,6 +313,45 @@ def unicode_literals():
return ustring return ustring
def non_ascii_unprefixed_str():
u"""
>>> s = non_ascii_unprefixed_str()
>>> isinstance(s, bytes)
False
>>> len(s)
3
"""
s = \x20\u0020'
assert isinstance(s, unicode)
return s
def non_ascii_raw_str():
u"""
>>> s = non_ascii_raw_str()
>>> isinstance(s, bytes)
False
>>> len(s)
11
"""
s = r'ø\x20\u0020'
assert isinstance(s, unicode)
return s
def non_ascii_raw_prefixed_unicode():
u"""
>>> s = non_ascii_raw_prefixed_unicode()
>>> isinstance(s, bytes)
False
>>> len(s)
11
"""
s = ru'ø\x20\u0020'
assert isinstance(s, unicode)
return s
def str_type_is_unicode(): def str_type_is_unicode():
""" """
>>> str_type, s = str_type_is_unicode() >>> str_type, s = str_type_is_unicode()
......
...@@ -13,6 +13,10 @@ b = 2 ...@@ -13,6 +13,10 @@ b = 2
x = 'abc' x = 'abc'
""" """
import sys
IS_PY2 = sys.version_info[0] < 3
def locals_function(a, b=2): def locals_function(a, b=2):
x = 'abc' x = 'abc'
return locals() return locals()
...@@ -64,6 +68,65 @@ def no_unicode_literals(): ...@@ -64,6 +68,65 @@ def no_unicode_literals():
return str_string return str_string
def non_ascii_str():
u"""
>>> s = 'ø\\x20\\u0020'
>>> isinstance(s, str)
True
>>> print(not IS_PY2 or len(s) == 9 or len(s)) # first is 2-char bytes in Py2, hex escape is resolved
True
>>> print(IS_PY2 or len(s) == 3 or len(s)) # 3 unicode characters in Py3
True
>>> s = non_ascii_str()
>>> isinstance(s, str)
True
>>> print(not IS_PY2 or len(s) == 9 or len(s)) # first is 2-char bytes in Py2, hex escape is resolved
True
>>> print(IS_PY2 or len(s) == 3 or len(s)) # 3 unicode characters in Py3
True
"""
s = \x20\u0020'
assert isinstance(s, str)
assert (IS_PY2 and isinstance(s, bytes)) or (not IS_PY2 and isinstance(s, unicode))
return s
def non_ascii_raw_str():
u"""
>>> s = r'ø\\x20\\u0020'
>>> print(not IS_PY2 or len(s) == 12 or len(s)) # Py2 (first character is two bytes)
True
>>> print(IS_PY2 or len(s) == 11 or len(s)) # Py3 (unicode string)
True
>>> s = non_ascii_raw_str()
>>> isinstance(s, str)
True
>>> print(not IS_PY2 or len(s) == 12 or len(s)) # Py2 (first character is two bytes)
True
>>> print(IS_PY2 or len(s) == 11 or len(s)) # Py3 (unicode string)
True
"""
s = r'ø\x20\u0020'
assert isinstance(s, str)
assert (IS_PY2 and isinstance(s, bytes)) or (not IS_PY2 and isinstance(s, unicode))
return s
def non_ascii_raw_unicode():
u"""
>>> s = non_ascii_raw_unicode()
>>> isinstance(s, bytes)
False
>>> len(s)
11
"""
s = ru'ø\x20\u0020'
assert isinstance(s, unicode)
return s
def str_type_is_str(): def str_type_is_str():
""" """
>>> str_type, s = str_type_is_str() >>> str_type, s = str_type_is_str()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment