Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
7186cc29
Commit
7186cc29
authored
May 05, 2017
by
Serhiy Storchaka
Committed by
GitHub
May 05, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-30277: Replace _sre.getlower() with _sre.ascii_tolower() and _sre.unicode_tolower(). (#1468)
parent
76a3e51a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
44 deletions
+83
-44
Lib/sre_compile.py
Lib/sre_compile.py
+10
-13
Lib/test/test_re.py
Lib/test/test_re.py
+16
-10
Modules/_sre.c
Modules/_sre.c
+20
-9
Modules/clinic/_sre.c.h
Modules/clinic/_sre.c.h
+37
-12
No files found.
Lib/sre_compile.py
View file @
7186cc29
...
...
@@ -69,13 +69,14 @@ def _compile(code, pattern, flags):
REPEATING_CODES
=
_REPEATING_CODES
SUCCESS_CODES
=
_SUCCESS_CODES
ASSERT_CODES
=
_ASSERT_CODES
if
(
flags
&
SRE_FLAG_IGNORECASE
and
not
(
flags
&
SRE_FLAG_LOCALE
)
and
flags
&
SRE_FLAG_UNICODE
and
not
(
flags
&
SRE_FLAG_ASCII
)):
fixes
=
_ignorecase_fixes
else
:
fixes
=
None
tolower
=
None
fixes
=
None
if
flags
&
SRE_FLAG_IGNORECASE
and
not
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_UNICODE
and
not
flags
&
SRE_FLAG_ASCII
:
tolower
=
_sre
.
unicode_tolower
fixes
=
_ignorecase_fixes
else
:
tolower
=
_sre
.
ascii_tolower
for
op
,
av
in
pattern
:
if
op
in
LITERAL_CODES
:
if
not
flags
&
SRE_FLAG_IGNORECASE
:
...
...
@@ -85,7 +86,7 @@ def _compile(code, pattern, flags):
emit
(
OP_LOC_IGNORE
[
op
])
emit
(
av
)
else
:
lo
=
_sre
.
getlower
(
av
,
flags
)
lo
=
tolower
(
av
)
if
fixes
and
lo
in
fixes
:
emit
(
IN_IGNORE
)
skip
=
_len
(
code
);
emit
(
0
)
...
...
@@ -102,16 +103,12 @@ def _compile(code, pattern, flags):
elif
op
is
IN
:
if
not
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
op
)
fixup
=
None
elif
flags
&
SRE_FLAG_LOCALE
:
emit
(
IN_LOC_IGNORE
)
fixup
=
None
else
:
emit
(
IN_IGNORE
)
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
literal
,
flags
)
skip
=
_len
(
code
);
emit
(
0
)
_compile_charset
(
av
,
flags
,
code
,
fixup
,
fixes
)
_compile_charset
(
av
,
flags
,
code
,
tolower
,
fixes
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
...
...
Lib/test/test_re.py
View file @
7186cc29
...
...
@@ -883,17 +883,23 @@ class ReTests(unittest.TestCase):
def test_category(self):
self.assertEqual(re.match(r"
(
\
s
)
", "
").group(1), "
")
def test_getlower(self):
@cpython_only
def test_case_helpers(self):
import _sre
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
self.assertEqual(re.match("
abc
", "
ABC
", re.I).group(0), "
ABC
")
self.assertEqual(re.match(b"
abc
", b"
ABC
", re.I).group(0), b"
ABC
")
self.assertEqual(re.match("
abc
", "
ABC
", re.I|re.A).group(0), "
ABC
")
self.assertEqual(re.match(b"
abc
", b"
ABC
", re.I|re.L).group(0), b"
ABC
")
for i in range(128):
c = chr(i)
lo = ord(c.lower())
self.assertEqual(_sre.ascii_tolower(i), lo)
self.assertEqual(_sre.unicode_tolower(i), lo)
for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
c = chr(i)
self.assertEqual(_sre.ascii_tolower(i), i)
if i != 0x0130:
self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
def test_not_literal(self):
self.assertEqual(re.search(r"
\
s
([
^
a
])
", "
b").group(1), "b")
...
...
Modules/_sre.c
View file @
7186cc29
...
...
@@ -274,25 +274,35 @@ _sre_getcodesize_impl(PyObject *module)
}
/*[clinic input]
_sre.
get
lower -> int
_sre.
ascii_to
lower -> int
character: int
flags: int
/
[clinic start generated code]*/
static
int
_sre_
getlower_impl
(
PyObject
*
module
,
int
character
,
int
flags
)
/*[clinic end generated code: output=
47eebc4c1214feb5 input=087d2f1c44bbca6f
]*/
_sre_
ascii_tolower_impl
(
PyObject
*
module
,
int
character
)
/*[clinic end generated code: output=
228294ed6ff2a612 input=272c609b5b61f136
]*/
{
if
(
flags
&
SRE_FLAG_LOCALE
)
return
sre_lower_locale
(
character
);
if
(
flags
&
SRE_FLAG_UNICODE
)
return
sre_lower_unicode
(
character
);
return
sre_lower
(
character
);
}
/*[clinic input]
_sre.unicode_tolower -> int
character: int
/
[clinic start generated code]*/
static
int
_sre_unicode_tolower_impl
(
PyObject
*
module
,
int
character
)
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
{
return
sre_lower_unicode
(
character
);
}
LOCAL
(
void
)
state_reset
(
SRE_STATE
*
state
)
{
...
...
@@ -2740,7 +2750,8 @@ static PyTypeObject Scanner_Type = {
static
PyMethodDef
_functions
[]
=
{
_SRE_COMPILE_METHODDEF
_SRE_GETCODESIZE_METHODDEF
_SRE_GETLOWER_METHODDEF
_SRE_ASCII_TOLOWER_METHODDEF
_SRE_UNICODE_TOLOWER_METHODDEF
{
NULL
,
NULL
}
};
...
...
Modules/clinic/_sre.c.h
View file @
7186cc29
...
...
@@ -29,34 +29,59 @@ exit:
return
return_value
;
}
PyDoc_STRVAR
(
_sre_
get
lower__doc__
,
"
getlower($module, character, flags
, /)
\n
"
PyDoc_STRVAR
(
_sre_
ascii_to
lower__doc__
,
"
ascii_tolower($module, character
, /)
\n
"
"--
\n
"
"
\n
"
);
#define _SRE_
GET
LOWER_METHODDEF \
{"
getlower", (PyCFunction)_sre_getlower, METH_FASTCALL, _sre_get
lower__doc__},
#define _SRE_
ASCII_TO
LOWER_METHODDEF \
{"
ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_to
lower__doc__},
static
int
_sre_
getlower_impl
(
PyObject
*
module
,
int
character
,
int
flags
);
_sre_
ascii_tolower_impl
(
PyObject
*
module
,
int
character
);
static
PyObject
*
_sre_
getlower
(
PyObject
*
module
,
PyObject
**
args
,
Py_ssize_t
nargs
,
PyObject
*
kwnames
)
_sre_
ascii_tolower
(
PyObject
*
module
,
PyObject
*
arg
)
{
PyObject
*
return_value
=
NULL
;
int
character
;
int
flags
;
int
_return_value
;
if
(
!
_PyArg_ParseStack
(
args
,
nargs
,
"ii:getlower"
,
&
character
,
&
flags
))
{
if
(
!
PyArg_Parse
(
arg
,
"i:ascii_tolower"
,
&
character
))
{
goto
exit
;
}
_return_value
=
_sre_ascii_tolower_impl
(
module
,
character
);
if
((
_return_value
==
-
1
)
&&
PyErr_Occurred
())
{
goto
exit
;
}
return_value
=
PyLong_FromLong
((
long
)
_return_value
);
exit:
return
return_value
;
}
PyDoc_STRVAR
(
_sre_unicode_tolower__doc__
,
"unicode_tolower($module, character, /)
\n
"
"--
\n
"
"
\n
"
);
#define _SRE_UNICODE_TOLOWER_METHODDEF \
{"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__},
static
int
_sre_unicode_tolower_impl
(
PyObject
*
module
,
int
character
);
static
PyObject
*
_sre_unicode_tolower
(
PyObject
*
module
,
PyObject
*
arg
)
{
PyObject
*
return_value
=
NULL
;
int
character
;
int
_return_value
;
if
(
!
_PyArg_NoStackKeywords
(
"getlower"
,
kwnames
))
{
if
(
!
PyArg_Parse
(
arg
,
"i:unicode_tolower"
,
&
character
))
{
goto
exit
;
}
_return_value
=
_sre_
getlower_impl
(
module
,
character
,
flags
);
_return_value
=
_sre_
unicode_tolower_impl
(
module
,
character
);
if
((
_return_value
==
-
1
)
&&
PyErr_Occurred
())
{
goto
exit
;
}
...
...
@@ -690,4 +715,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return
_sre_SRE_Scanner_search_impl
(
self
);
}
/*[clinic end generated code: output=
e6dab3ba8864da9
e input=a9049054013a1b77]*/
/*[clinic end generated code: output=
811e67d7f8f5052
e input=a9049054013a1b77]*/
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment