Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
6d336a02
Commit
6d336a02
authored
May 09, 2017
by
Serhiy Storchaka
Committed by
GitHub
May 09, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-30285: Optimize case-insensitive matching and searching (#1482)
of regular expressions.
parent
f93234bb
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
215 additions
and
70 deletions
+215
-70
Doc/whatsnew/3.7.rst
Doc/whatsnew/3.7.rst
+4
-0
Lib/sre_compile.py
Lib/sre_compile.py
+102
-69
Lib/test/test_re.py
Lib/test/test_re.py
+9
-0
Misc/NEWS
Misc/NEWS
+3
-0
Modules/_sre.c
Modules/_sre.c
+34
-0
Modules/clinic/_sre.c.h
Modules/clinic/_sre.c.h
+63
-1
No files found.
Doc/whatsnew/3.7.rst
View file @
6d336a02
...
...
@@ -208,6 +208,10 @@ Optimizations
using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`25996`.)
* Optimized case-insensitive matching and searching of :mod:`regular
expressions <re>`. Searching some patterns can now be up to 20 times faster.
(Contributed by Serhiy Storchaka in :issue:`30285`.)
Build and C API Changes
=======================
...
...
Lib/sre_compile.py
View file @
6d336a02
This diff is collapsed.
Click to expand it.
Lib/test/test_re.py
View file @
6d336a02
...
...
@@ -891,15 +891,24 @@ class ReTests(unittest.TestCase):
lo = ord(c.lower())
self.assertEqual(_sre.ascii_tolower(i), lo)
self.assertEqual(_sre.unicode_tolower(i), lo)
iscased = c in string.ascii_letters
self.assertEqual(_sre.ascii_iscased(i), iscased)
self.assertEqual(_sre.unicode_iscased(i), iscased)
for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
c = chr(i)
self.assertEqual(_sre.ascii_tolower(i), i)
if i != 0x0130:
self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
iscased = c != c.lower() or c != c.upper()
self.assertFalse(_sre.ascii_iscased(i))
self.assertEqual(_sre.unicode_iscased(i),
c != c.lower() or c != c.upper())
self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
self.assertFalse(_sre.ascii_iscased(0x0130))
self.assertTrue(_sre.unicode_iscased(0x0130))
def test_not_literal(self):
self.assertEqual(re.search(r"
\
s
([
^
a
])
", "
b").group(1), "b")
...
...
Misc/NEWS
View file @
6d336a02
...
...
@@ -320,6 +320,9 @@ Extension Modules
Library
-------
- bpo-30285: Optimized case-insensitive matching and searching of regular
expressions.
- bpo-29990: Fix range checking in GB18030 decoder. Original patch by Ma Lin.
- bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and
...
...
Modules/_sre.c
View file @
6d336a02
...
...
@@ -273,6 +273,38 @@ _sre_getcodesize_impl(PyObject *module)
return
sizeof
(
SRE_CODE
);
}
/*[clinic input]
_sre.ascii_iscased -> bool
character: int
/
[clinic start generated code]*/
static
int
_sre_ascii_iscased_impl
(
PyObject
*
module
,
int
character
)
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
{
unsigned
int
ch
=
(
unsigned
int
)
character
;
return
ch
!=
sre_lower
(
ch
)
||
ch
!=
sre_upper
(
ch
);
}
/*[clinic input]
_sre.unicode_iscased -> bool
character: int
/
[clinic start generated code]*/
static
int
_sre_unicode_iscased_impl
(
PyObject
*
module
,
int
character
)
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
{
unsigned
int
ch
=
(
unsigned
int
)
character
;
return
ch
!=
sre_lower_unicode
(
ch
)
||
ch
!=
sre_upper_unicode
(
ch
);
}
/*[clinic input]
_sre.ascii_tolower -> int
...
...
@@ -2750,6 +2782,8 @@ static PyTypeObject Scanner_Type = {
static
PyMethodDef
_functions
[]
=
{
_SRE_COMPILE_METHODDEF
_SRE_GETCODESIZE_METHODDEF
_SRE_ASCII_ISCASED_METHODDEF
_SRE_UNICODE_ISCASED_METHODDEF
_SRE_ASCII_TOLOWER_METHODDEF
_SRE_UNICODE_TOLOWER_METHODDEF
{
NULL
,
NULL
}
...
...
Modules/clinic/_sre.c.h
View file @
6d336a02
...
...
@@ -29,6 +29,68 @@ exit:
return
return_value
;
}
PyDoc_STRVAR
(
_sre_ascii_iscased__doc__
,
"ascii_iscased($module, character, /)
\n
"
"--
\n
"
"
\n
"
);
#define _SRE_ASCII_ISCASED_METHODDEF \
{"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__},
static
int
_sre_ascii_iscased_impl
(
PyObject
*
module
,
int
character
);
static
PyObject
*
_sre_ascii_iscased
(
PyObject
*
module
,
PyObject
*
arg
)
{
PyObject
*
return_value
=
NULL
;
int
character
;
int
_return_value
;
if
(
!
PyArg_Parse
(
arg
,
"i:ascii_iscased"
,
&
character
))
{
goto
exit
;
}
_return_value
=
_sre_ascii_iscased_impl
(
module
,
character
);
if
((
_return_value
==
-
1
)
&&
PyErr_Occurred
())
{
goto
exit
;
}
return_value
=
PyBool_FromLong
((
long
)
_return_value
);
exit:
return
return_value
;
}
PyDoc_STRVAR
(
_sre_unicode_iscased__doc__
,
"unicode_iscased($module, character, /)
\n
"
"--
\n
"
"
\n
"
);
#define _SRE_UNICODE_ISCASED_METHODDEF \
{"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__},
static
int
_sre_unicode_iscased_impl
(
PyObject
*
module
,
int
character
);
static
PyObject
*
_sre_unicode_iscased
(
PyObject
*
module
,
PyObject
*
arg
)
{
PyObject
*
return_value
=
NULL
;
int
character
;
int
_return_value
;
if
(
!
PyArg_Parse
(
arg
,
"i:unicode_iscased"
,
&
character
))
{
goto
exit
;
}
_return_value
=
_sre_unicode_iscased_impl
(
module
,
character
);
if
((
_return_value
==
-
1
)
&&
PyErr_Occurred
())
{
goto
exit
;
}
return_value
=
PyBool_FromLong
((
long
)
_return_value
);
exit:
return
return_value
;
}
PyDoc_STRVAR
(
_sre_ascii_tolower__doc__
,
"ascii_tolower($module, character, /)
\n
"
"--
\n
"
...
...
@@ -715,4 +777,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return
_sre_SRE_Scanner_search_impl
(
self
);
}
/*[clinic end generated code: output=
811e67d7f8f5052e
input=a9049054013a1b77]*/
/*[clinic end generated code: output=
5fe47c49e475cccb
input=a9049054013a1b77]*/
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment