Commit 0189a46e authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.

parent ebca3a5a
"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...". r"""Fixer for unicode.
* Changes unicode to str and unichr to chr.
* If "...\u..." is not unicode literal change it into "...\\u...".
* Change u"..." into "...".
""" """
import re
from ..pgen2 import token from ..pgen2 import token
from .. import fixer_base from .. import fixer_base
_mapping = {u"unichr" : u"chr", u"unicode" : u"str"} _mapping = {u"unichr" : u"chr", u"unicode" : u"str"}
_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
class FixUnicode(fixer_base.BaseFix): class FixUnicode(fixer_base.BaseFix):
BM_compatible = True BM_compatible = True
PATTERN = "STRING | 'unicode' | 'unichr'" PATTERN = "STRING | 'unicode' | 'unichr'"
def start_tree(self, tree, filename):
super(FixUnicode, self).start_tree(tree, filename)
self.unicode_literals = 'unicode_literals' in tree.future_features
def transform(self, node, results): def transform(self, node, results):
if node.type == token.NAME: if node.type == token.NAME:
new = node.clone() new = node.clone()
new.value = _mapping[node.value] new.value = _mapping[node.value]
return new return new
elif node.type == token.STRING: elif node.type == token.STRING:
if _literal_re.match(node.value): val = node.value
new = node.clone() if (not self.unicode_literals and val[0] in u'rR\'"' and
new.value = new.value[1:] u'\\' in val):
return new val = ur'\\'.join([
v.replace(u'\\u', ur'\\u').replace(u'\\U', ur'\\U')
for v in val.split(ur'\\')
])
if val[0] in u'uU':
val = val[1:]
if val == node.value:
return node
new = node.clone()
new.value = val
return new
...@@ -2824,6 +2824,43 @@ class Test_unicode(FixerTestCase): ...@@ -2824,6 +2824,43 @@ class Test_unicode(FixerTestCase):
a = """R'''x''' """ a = """R'''x''' """
self.check(b, a) self.check(b, a)
def test_native_literal_escape_u(self):
b = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
self.check(b, a)
b = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """r'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
self.check(b, a)
def test_bytes_literal_escape_u(self):
b = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
b = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
def test_unicode_literal_escape_u(self):
b = """u'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
b = """ur'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
def test_native_unicode_literal_escape_u(self):
f = 'from __future__ import unicode_literals\n'
b = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
b = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
a = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
self.check(b, a)
class Test_callable(FixerTestCase): class Test_callable(FixerTestCase):
fixer = "callable" fixer = "callable"
......
...@@ -32,6 +32,8 @@ Core and Builtins ...@@ -32,6 +32,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
- Issue #19137: The pprint module now correctly formats empty set and frozenset - Issue #19137: The pprint module now correctly formats empty set and frozenset
and instances of set and frozenset subclasses. and instances of set and frozenset subclasses.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment