Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
4f859ed9
Commit
4f859ed9
authored
Aug 09, 2010
by
Raymond Hettinger
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue 9396. Apply functools.lru_cache in the place of the
random flushing cache in the re module.
parent
cca65313
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
84 deletions
+14
-84
Lib/re.py
Lib/re.py
+12
-20
Lib/test/test_re.py
Lib/test/test_re.py
+0
-62
Misc/NEWS
Misc/NEWS
+2
-2
No files found.
Lib/re.py
View file @
4f859ed9
...
...
@@ -118,6 +118,7 @@ This module also defines an exception 'error'.
import
sys
import
sre_compile
import
sre_parse
import
functools
# public symbols
__all__
=
[
"match"
,
"search"
,
"sub"
,
"subn"
,
"split"
,
"findall"
,
...
...
@@ -205,9 +206,9 @@ def compile(pattern, flags=0):
return
_compile
(
pattern
,
flags
)
def
purge
():
"Clear the regular expression cache"
_c
ache
.
clear
()
_c
ach
e_repl
.
clear
()
"Clear the regular expression cache
s
"
_c
ompile_typed
.
clear
()
_c
ompil
e_repl
.
clear
()
def
template
(
pattern
,
flags
=
0
):
"Compile a template pattern, returning a pattern object"
...
...
@@ -289,12 +290,12 @@ def _shrink_cache(cache_dict, max_length, divisor=5):
# Ignore problems if the cache changed from another thread.
pass
def
_compile
(
*
key
):
def
_compile
(
*
args
):
return
_compile_typed
(
type
(
args
[
0
]),
*
args
)
@
functools
.
lru_cache
(
maxsize
=
_MAXCACHE
)
def
_compile_typed
(
type
,
*
key
):
# internal: compile pattern
cachekey
=
(
type
(
key
[
0
]),)
+
key
p
=
_cache
.
get
(
cachekey
)
if
p
is
not
None
:
return
p
pattern
,
flags
=
key
if
isinstance
(
pattern
,
_pattern_type
):
if
flags
:
...
...
@@ -303,23 +304,14 @@ def _compile(*key):
return
pattern
if
not
sre_compile
.
isstring
(
pattern
):
raise
TypeError
(
"first argument must be string or compiled pattern"
)
p
=
sre_compile
.
compile
(
pattern
,
flags
)
if
len
(
_cache
)
>=
_MAXCACHE
:
_shrink_cache
(
_cache
,
_MAXCACHE
)
_cache
[
cachekey
]
=
p
return
sre_compile
.
compile
(
pattern
,
flags
)
return
p
@
functools
.
lru_cache
(
maxsize
=
_MAXCACHE
)
def
_compile_repl
(
*
key
):
# internal: compile replacement pattern
p
=
_cache_repl
.
get
(
key
)
if
p
is
not
None
:
return
p
repl
,
pattern
=
key
p
=
sre_parse
.
parse_template
(
repl
,
pattern
)
if
len
(
_cache_repl
)
>=
_MAXCACHE
:
_shrink_cache
(
_cache_repl
,
_MAXCACHE
)
_cache_repl
[
key
]
=
p
return
p
return
sre_parse
.
parse_template
(
repl
,
pattern
)
def
_expand
(
pattern
,
match
,
template
):
# internal: match.expand implementation hook
...
...
Lib/test/test_re.py
View file @
4f859ed9
...
...
@@ -875,70 +875,8 @@ def run_re_tests():
print('=== Fails on unicode-sensitive match', t)
class ReCacheTests(unittest.TestCase):
"""These tests are specific to the re._shrink_cache implementation."""
def setUp(self):
self._orig_maxcache = re._MAXCACHE
def tearDown(self):
re._MAXCACHE = self._orig_maxcache
def test_compile_cache_overflow(self):
# NOTE: If a profiler or debugger is tracing code and compiling
# regular expressions while tracing through this test... expect
# the test to fail. This test is not concurrency safe.
# Explicitly fill the caches.
re._MAXCACHE = 20
max_cache = re._MAXCACHE
unique_chars = tuple(chr(char_num) for char_num in
range(b'a'[0], b'a'[0]+max_cache))
re._cache.clear()
for char in unique_chars:
re._compile(char, 0)
self.assertEqual(max_cache, len(re._cache))
re._cache_repl.clear()
for char in unique_chars:
re._compile_repl(char*2, char)
self.assertEqual(max_cache, len(re._cache_repl))
# Overflow both caches and make sure they have extra room left
# afterwards as well as having more than a single entry.
re._compile('A', 0)
self.assertLess(len(re._cache), max_cache)
self.assertGreater(len(re._cache), 1)
re._compile_repl('A', 'A')
self.assertLess(len(re._cache_repl), max_cache)
self.assertGreater(len(re._cache_repl), 1)
def test_shrink_cache_at_limit(self):
cache = dict(zip(range(6), range(6)))
re._shrink_cache(cache, 6, divisor=3)
self.assertEqual(4, len(cache))
def test_shrink_cache_empty(self):
cache = {}
re._shrink_cache(cache, 6, divisor=3)
# Cache was empty, make sure we didn't raise an exception.
self.assertEqual(0, len(cache))
def test_shrink_cache_overflowing(self):
cache = dict(zip(range(6), range(6)))
re._shrink_cache(cache, 4, divisor=2)
# Cache was larger than the maximum, be sure we shrunk to smaller.
self.assertEqual(2, len(cache))
def test_shrink_cache_underflow(self):
cache = dict(zip(range(6), range(6)))
# No shrinking to do.
re._shrink_cache(cache, 9, divisor=3)
self.assertEqual(6, len(cache))
def test_main():
run_unittest(ReTests)
run_unittest(ReCacheTests)
run_re_tests()
if __name__ == "
__main__
":
...
...
Misc/NEWS
View file @
4f859ed9
...
...
@@ -697,8 +697,8 @@ Library
- The default size of the re module's compiled regular expression cache has been
increased from 100 to 500 and the cache replacement policy has changed from
simply clearing the entire cache on overflow to
randomly forgetting 20% of the
existing
cached compiled regular expressions. This is a performance win for
simply clearing the entire cache on overflow to
forgetting the least recently
used
cached compiled regular expressions. This is a performance win for
applications that use a lot of regular expressions and limits the impact of
the performance hit anytime the cache is exceeded.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment