Commit d4c72903 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22410: Module level functions in the re module now cache compiled

locale-dependent regular expressions taking into account the locale.
parent 65566984
...@@ -104,6 +104,7 @@ This module also defines an exception 'error'. ...@@ -104,6 +104,7 @@ This module also defines an exception 'error'.
import sys import sys
import sre_compile import sre_compile
import sre_parse import sre_parse
import _locale
# public symbols # public symbols
__all__ = [ "match", "search", "sub", "subn", "split", "findall", __all__ = [ "match", "search", "sub", "subn", "split", "findall",
...@@ -229,9 +230,12 @@ def _compile(*key): ...@@ -229,9 +230,12 @@ def _compile(*key):
bypass_cache = flags & DEBUG bypass_cache = flags & DEBUG
if not bypass_cache: if not bypass_cache:
cachekey = (type(key[0]),) + key cachekey = (type(key[0]),) + key
p = _cache.get(cachekey) try:
if p is not None: p, loc = _cache[cachekey]
return p if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
return p
except KeyError:
pass
if isinstance(pattern, _pattern_type): if isinstance(pattern, _pattern_type):
if flags: if flags:
raise ValueError('Cannot process flags argument with a compiled pattern') raise ValueError('Cannot process flags argument with a compiled pattern')
...@@ -245,7 +249,11 @@ def _compile(*key): ...@@ -245,7 +249,11 @@ def _compile(*key):
if not bypass_cache: if not bypass_cache:
if len(_cache) >= _MAXCACHE: if len(_cache) >= _MAXCACHE:
_cache.clear() _cache.clear()
_cache[cachekey] = p if p.flags & LOCALE:
loc = _locale.setlocale(_locale.LC_CTYPE)
else:
loc = None
_cache[cachekey] = p, loc
return p return p
def _compile_repl(*key): def _compile_repl(*key):
......
from test.test_support import verbose, run_unittest, import_module from test.test_support import verbose, run_unittest, import_module
from test.test_support import precisionbigmemtest, _2G, cpython_only from test.test_support import precisionbigmemtest, _2G, cpython_only
from test.test_support import captured_stdout, have_unicode, requires_unicode, u from test.test_support import captured_stdout, have_unicode, requires_unicode, u
import locale
import re import re
from re import Scanner from re import Scanner
import sre_constants import sre_constants
...@@ -975,6 +976,42 @@ subpattern None ...@@ -975,6 +976,42 @@ subpattern None
self.assertEqual(re.match("(foo)", "foo").group(1L), "foo") self.assertEqual(re.match("(foo)", "foo").group(1L), "foo")
self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1) self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1)
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def check_en_US_iso88591(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
def check_en_US_utf8(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
def run_re_tests(): def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
......
...@@ -37,6 +37,9 @@ Core and Builtins ...@@ -37,6 +37,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
- Issue #8876: distutils now falls back to copying files when hard linking - Issue #8876: distutils now falls back to copying files when hard linking
doesn't work. This allows use with special filesystems such as VirtualBox doesn't work. This allows use with special filesystems such as VirtualBox
shared folders. shared folders.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment