Commit 71ec1a4a authored by Stefan Behnel's avatar Stefan Behnel

Resolve unicode escapes in Python 2 raw unicode strings.

Closes #1594.
parent 9746acb3
...@@ -27,6 +27,10 @@ Bugs fixed ...@@ -27,6 +27,10 @@ Bugs fixed
* f-string processing was adapted to match recent changes in PEP 498 and * f-string processing was adapted to match recent changes in PEP 498 and
CPython 3.6. CPython 3.6.
* Unicode escapes in 'ur' raw-unicode strings were not resolved in Py2 code.
Original patch by Aaron Gallagher (Github issue #1594).
0.25.2 (2016-12-08) 0.25.2 (2016-12-08)
=================== ===================
......
...@@ -921,7 +921,8 @@ def p_string_literal(s, kind_override=None): ...@@ -921,7 +921,8 @@ def p_string_literal(s, kind_override=None):
if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr): if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
has_non_ascii_literal_characters = True has_non_ascii_literal_characters = True
elif sy == 'ESCAPE': elif sy == 'ESCAPE':
if is_raw: # in Py2, 'ur' raw unicode strings resolve unicode escapes but nothing else
if is_raw and (is_python3_source or kind != 'u' or systr[1] not in u'Uu'):
chars.append(systr) chars.append(systr)
if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr): if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
has_non_ascii_literal_characters = True has_non_ascii_literal_characters = True
......
...@@ -9,6 +9,8 @@ if sys.version_info[0] >= 3: ...@@ -9,6 +9,8 @@ if sys.version_info[0] >= 3:
True True
>>> isinstance(b, bytes) >>> isinstance(b, bytes)
True True
>>> raw == 'abc\\\\xf8\\\\t\\u00f8\\U000000f8' # unescaped by Python (required by doctest)
True
""" """
else: else:
__doc__ = u""" __doc__ = u"""
...@@ -18,9 +20,13 @@ else: ...@@ -18,9 +20,13 @@ else:
True True
>>> isinstance(b, str) >>> isinstance(b, str)
True True
>>> raw == u'abc\\\\xf8\\\\t\\u00f8\\U000000f8' # unescaped by Python (required by doctest)
True
""" """
u = "test" u = "test"
cdef char* s = "bytes test" cdef char* s = "bytes test"
b = s b = s
raw = r'abc\xf8\t\u00f8\U000000f8'
...@@ -77,6 +77,8 @@ __doc__ = br""" ...@@ -77,6 +77,8 @@ __doc__ = br"""
True True
>>> k == u'\\N{SNOWMAN}' == u'\\u2603' >>> k == u'\\N{SNOWMAN}' == u'\\u2603'
True True
>>> m == u'abc\\\\xf8\\\\t\\u00f8\\U000000f8' # unescaped by Python (required by doctest)
True
>>> add == u'Søk ik' + u'üÖä' + 'abc' >>> add == u'Søk ik' + u'üÖä' + 'abc'
True True
>>> null == u'\\x00' # unescaped by Python (required by doctest) >>> null == u'\\x00' # unescaped by Python (required by doctest)
...@@ -110,6 +112,7 @@ f = u'\xf8' ...@@ -110,6 +112,7 @@ f = u'\xf8'
g = u'\udc00' # lone trail surrogate g = u'\udc00' # lone trail surrogate
h = u'\ud800' # lone lead surrogate h = u'\ud800' # lone lead surrogate
k = u'\N{SNOWMAN}' k = u'\N{SNOWMAN}'
m = ur'abc\xf8\t\u00f8\U000000f8'
add = u'Søk ik' + u'üÖä' + u'abc' add = u'Søk ik' + u'üÖä' + u'abc'
null = u'\x00' null = u'\x00'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment