Commit 22a309a4 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or

re.ASCII. It was newer worked.
parent 720b8c9d
...@@ -521,7 +521,11 @@ form. ...@@ -521,7 +521,11 @@ form.
current locale. The use of this flag is discouraged as the locale mechanism current locale. The use of this flag is discouraged as the locale mechanism
is very unreliable, and it only handles one "culture" at a time anyway; is very unreliable, and it only handles one "culture" at a time anyway;
you should use Unicode matching instead, which is the default in Python 3 you should use Unicode matching instead, which is the default in Python 3
for Unicode (str) patterns. for Unicode (str) patterns. This flag makes sense only with bytes patterns.
.. deprecated-removed:: 3.5 3.6
Deprecated the use of :const:`re.LOCALE` with string patterns or
:const:`re.ASCII`.
.. data:: M .. data:: M
......
...@@ -751,6 +751,11 @@ def _parse(source, state): ...@@ -751,6 +751,11 @@ def _parse(source, state):
def fix_flags(src, flags): def fix_flags(src, flags):
# Check and fix flags according to the type of pattern (str or bytes) # Check and fix flags according to the type of pattern (str or bytes)
if isinstance(src, str): if isinstance(src, str):
if flags & SRE_FLAG_LOCALE:
import warnings
warnings.warn("LOCALE flag with a str pattern is deprecated. "
"Will be an error in 3.6",
DeprecationWarning, stacklevel=6)
if not flags & SRE_FLAG_ASCII: if not flags & SRE_FLAG_ASCII:
flags |= SRE_FLAG_UNICODE flags |= SRE_FLAG_UNICODE
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
...@@ -758,6 +763,11 @@ def fix_flags(src, flags): ...@@ -758,6 +763,11 @@ def fix_flags(src, flags):
else: else:
if flags & SRE_FLAG_UNICODE: if flags & SRE_FLAG_UNICODE:
raise ValueError("can't use UNICODE flag with a bytes pattern") raise ValueError("can't use UNICODE flag with a bytes pattern")
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
import warnings
warnings.warn("ASCII and LOCALE flags are incompatible. "
"Will be an error in 3.6",
DeprecationWarning, stacklevel=6)
return flags return flags
def parse(str, flags=0, pattern=None): def parse(str, flags=0, pattern=None):
......
...@@ -502,10 +502,6 @@ class ReTests(unittest.TestCase): ...@@ -502,10 +502,6 @@ class ReTests(unittest.TestCase):
"abcd abc bcd bx", re.ASCII).group(1), "bx") "abcd abc bcd bx", re.ASCII).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B", self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.ASCII).group(1), "bx") "abc bcd bc abxd", re.ASCII).group(1), "bx")
self.assertEqual(re.search(r"\b(b.)\b",
"abcd abc bcd bx", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
...@@ -526,8 +522,6 @@ class ReTests(unittest.TestCase): ...@@ -526,8 +522,6 @@ class ReTests(unittest.TestCase):
b"1aa! a").group(0), b"1aa! a") b"1aa! a").group(0), b"1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S", self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.ASCII).group(0), "1aa! a") "1aa! a", re.ASCII).group(0), "1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.LOCALE).group(0), "1aa! a")
self.assertEqual(re.search(br"\d\D\w\W\s\S", self.assertEqual(re.search(br"\d\D\w\W\s\S",
b"1aa! a", re.LOCALE).group(0), b"1aa! a") b"1aa! a", re.LOCALE).group(0), b"1aa! a")
...@@ -693,9 +687,12 @@ class ReTests(unittest.TestCase): ...@@ -693,9 +687,12 @@ class ReTests(unittest.TestCase):
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
def test_not_literal(self): def test_not_literal(self):
self.assertEqual(re.search("\s([^a])", " b").group(1), "b") self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
...@@ -780,8 +777,10 @@ class ReTests(unittest.TestCase): ...@@ -780,8 +777,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.X, re.VERBOSE) self.assertEqual(re.X, re.VERBOSE)
def test_flags(self): def test_flags(self):
for flag in [re.I, re.M, re.X, re.S, re.L]: for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
self.assertTrue(re.compile('^pattern$', flag)) self.assertTrue(re.compile('^pattern$', flag))
for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
self.assertTrue(re.compile(b'^pattern$', flag))
def test_sre_character_literals(self): def test_sre_character_literals(self):
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
...@@ -1146,6 +1145,52 @@ class ReTests(unittest.TestCase): ...@@ -1146,6 +1145,52 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
self.assertRaises(ValueError, re.compile, '(?au)\w') self.assertRaises(ValueError, re.compile, '(?au)\w')
def test_locale_flag(self):
import locale
_, enc = locale.getlocale(locale.LC_CTYPE)
# Search non-ASCII letter
for i in range(128, 256):
try:
c = bytes([i]).decode(enc)
sletter = c.lower()
if sletter == c: continue
bletter = sletter.encode(enc)
if len(bletter) != 1: continue
if bletter.decode(enc) != sletter: continue
bpat = re.escape(bytes([i]))
break
except (UnicodeError, TypeError):
pass
else:
bletter = None
bpat = b'A'
# Bytes patterns
pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
if bletter:
self.assertTrue(pat.match(bletter))
pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
if bletter:
self.assertTrue(pat.match(bletter))
pat = re.compile(bpat, re.IGNORECASE)
if bletter:
self.assertIsNone(pat.match(bletter))
pat = re.compile(b'\w', re.LOCALE)
if bletter:
self.assertTrue(pat.match(bletter))
pat = re.compile(b'(?L)\w')
if bletter:
self.assertTrue(pat.match(bletter))
pat = re.compile(b'\w')
if bletter:
self.assertIsNone(pat.match(bletter))
# Incompatibilities
self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
self.assertWarns(DeprecationWarning, re.compile, '(?L)')
self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
def test_bug_6509(self): def test_bug_6509(self):
# Replacement strings of both types must parse properly. # Replacement strings of both types must parse properly.
# all strings # all strings
...@@ -1477,6 +1522,10 @@ class PatternReprTests(unittest.TestCase): ...@@ -1477,6 +1522,10 @@ class PatternReprTests(unittest.TestCase):
self.check_flags(b'bytes pattern', re.A, self.check_flags(b'bytes pattern', re.A,
"re.compile(b'bytes pattern', re.ASCII)") "re.compile(b'bytes pattern', re.ASCII)")
def test_locale(self):
self.check_flags(b'bytes pattern', re.L,
"re.compile(b'bytes pattern', re.LOCALE)")
def test_quotes(self): def test_quotes(self):
self.check('random "double quoted" pattern', self.check('random "double quoted" pattern',
'''re.compile('random "double quoted" pattern')''') '''re.compile('random "double quoted" pattern')''')
...@@ -1590,8 +1639,16 @@ class ExternalTests(unittest.TestCase): ...@@ -1590,8 +1639,16 @@ class ExternalTests(unittest.TestCase):
pass pass
else: else:
with self.subTest('bytes pattern match'): with self.subTest('bytes pattern match'):
bpat = re.compile(bpat) obj = re.compile(bpat)
self.assertTrue(bpat.search(bs)) self.assertTrue(obj.search(bs))
# Try the match with LOCALE enabled, and check that it
# still succeeds.
with self.subTest('locale-sensitive match'):
obj = re.compile(bpat, re.LOCALE)
result = obj.search(bs)
if result is None:
print('=== Fails on locale-sensitive match', t)
# Try the match with the search area limited to the extent # Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will # of the match and see if it still succeeds. \B will
...@@ -1609,13 +1666,6 @@ class ExternalTests(unittest.TestCase): ...@@ -1609,13 +1666,6 @@ class ExternalTests(unittest.TestCase):
obj = re.compile(pattern, re.IGNORECASE) obj = re.compile(pattern, re.IGNORECASE)
self.assertTrue(obj.search(s)) self.assertTrue(obj.search(s))
# Try the match with LOCALE enabled, and check that it
# still succeeds.
if '(?u)' not in pattern:
with self.subTest('locale-sensitive match'):
obj = re.compile(pattern, re.LOCALE)
self.assertTrue(obj.search(s))
# Try the match with UNICODE locale enabled, and check # Try the match with UNICODE locale enabled, and check
# that it still succeeds. # that it still succeeds.
with self.subTest('unicode-sensitive match'): with self.subTest('unicode-sensitive match'):
......
...@@ -191,6 +191,9 @@ Core and Builtins ...@@ -191,6 +191,9 @@ Core and Builtins
Library Library
------- -------
- Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
re.ASCII. It was newer worked.
- Issue #22902: The "ip" command is now used on Linux to determine MAC address - Issue #22902: The "ip" command is now used on Linux to determine MAC address
in uuid.getnode(). Pach by Bruno Cauet. in uuid.getnode(). Pach by Bruno Cauet.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment