Commit 4659cc07 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22410: Module level functions in the re module now cache compiled

locale-dependent regular expressions taking into account the locale.
parent 35903c80
...@@ -122,6 +122,7 @@ This module also defines an exception 'error'. ...@@ -122,6 +122,7 @@ This module also defines an exception 'error'.
import sys import sys
import sre_compile import sre_compile
import sre_parse import sre_parse
import _locale
# public symbols # public symbols
__all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall", __all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall",
...@@ -275,7 +276,9 @@ def _compile(pattern, flags): ...@@ -275,7 +276,9 @@ def _compile(pattern, flags):
bypass_cache = flags & DEBUG bypass_cache = flags & DEBUG
if not bypass_cache: if not bypass_cache:
try: try:
return _cache[type(pattern), pattern, flags] p, loc = _cache[type(pattern), pattern, flags]
if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
return p
except KeyError: except KeyError:
pass pass
if isinstance(pattern, _pattern_type): if isinstance(pattern, _pattern_type):
...@@ -289,7 +292,11 @@ def _compile(pattern, flags): ...@@ -289,7 +292,11 @@ def _compile(pattern, flags):
if not bypass_cache: if not bypass_cache:
if len(_cache) >= _MAXCACHE: if len(_cache) >= _MAXCACHE:
_cache.clear() _cache.clear()
_cache[type(pattern), pattern, flags] = p if p.flags & LOCALE:
loc = _locale.setlocale(_locale.LC_CTYPE)
else:
loc = None
_cache[type(pattern), pattern, flags] = p, loc
return p return p
def _compile_repl(repl, pattern): def _compile_repl(repl, pattern):
......
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
cpython_only, captured_stdout cpython_only, captured_stdout
import io import io
import locale
import re import re
from re import Scanner from re import Scanner
import sre_compile import sre_compile
...@@ -1254,6 +1255,42 @@ subpattern None ...@@ -1254,6 +1255,42 @@ subpattern None
# with ignore case. # with ignore case.
self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def check_en_US_iso88591(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
def check_en_US_utf8(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
class PatternReprTests(unittest.TestCase): class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected): def check(self, pattern, expected):
......
...@@ -33,6 +33,9 @@ Core and Builtins ...@@ -33,6 +33,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
- Issue #8876: distutils now falls back to copying files when hard linking - Issue #8876: distutils now falls back to copying files when hard linking
doesn't work. This allows use with special filesystems such as VirtualBox doesn't work. This allows use with special filesystems such as VirtualBox
shared folders. shared folders.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment