Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
be9a4e5c
Commit
be9a4e5c
authored
Sep 10, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #433028: Added support of modifier spans in regular expressions.
parent
ee73a657
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
180 additions
and
66 deletions
+180
-66
Doc/library/re.rst
Doc/library/re.rst
+10
-0
Doc/whatsnew/3.6.rst
Doc/whatsnew/3.6.rst
+9
-0
Lib/re.py
Lib/re.py
+1
-1
Lib/sre_compile.py
Lib/sre_compile.py
+38
-31
Lib/sre_parse.py
Lib/sre_parse.py
+84
-30
Lib/test/test_re.py
Lib/test/test_re.py
+36
-4
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
View file @
be9a4e5c
...
@@ -237,6 +237,16 @@ The special characters are:
...
@@ -237,6 +237,16 @@ The special characters are:
*cannot* be retrieved after performing a match or referenced later in the
*cannot* be retrieved after performing a match or referenced later in the
pattern.
pattern.
``(?imsx-imsx:...)``
(Zero or more letters from the set ``'i'``, ``'m'``, ``'s'``, ``'x'``,
optionally followed by ``'-'`` followed by one or more letters from the
same set.) The letters set or removes the corresponding flags:
:const:`re.I` (ignore case), :const:`re.M` (multi-line), :const:`re.S`
(dot matches all), and :const:`re.X` (verbose), for the part of the
expression. (The flags are described in :ref:`contents-of-module-re`.)
.. versionadded: 3.7
``(?P<name>...)``
``(?P<name>...)``
Similar to regular parentheses, but the substring matched by the group is
Similar to regular parentheses, but the substring matched by the group is
accessible via the symbolic group name *name*. Group names must be valid
accessible via the symbolic group name *name*. Group names must be valid
...
...
Doc/whatsnew/3.6.rst
View file @
be9a4e5c
...
@@ -645,6 +645,15 @@ Protocol version 4 already supports this case. (Contributed by Serhiy
...
@@ -645,6 +645,15 @@ Protocol version 4 already supports this case. (Contributed by Serhiy
Storchaka in :issue:`24164`.)
Storchaka in :issue:`24164`.)
re
--
Added support of modifier spans in regular expressions. Examples:
``'(?i:p)ython'`` matches ``'python'`` and ``'Python'``, but not ``'PYTHON'``;
``'(?i)g(?-i:v)r'`` matches ``'GvR'`` and ``'gvr'``, but not ``'GVR'``.
(Contributed by Serhiy Storchaka in :issue:`433028`.)
readline
readline
--------
--------
...
...
Lib/re.py
View file @
be9a4e5c
...
@@ -352,7 +352,7 @@ class Scanner:
...
@@ -352,7 +352,7 @@ class Scanner:
for
phrase
,
action
in
lexicon
:
for
phrase
,
action
in
lexicon
:
gid
=
s
.
opengroup
()
gid
=
s
.
opengroup
()
p
.
append
(
sre_parse
.
SubPattern
(
s
,
[
p
.
append
(
sre_parse
.
SubPattern
(
s
,
[
(
SUBPATTERN
,
(
gid
,
sre_parse
.
parse
(
phrase
,
flags
))),
(
SUBPATTERN
,
(
gid
,
0
,
0
,
sre_parse
.
parse
(
phrase
,
flags
))),
]))
]))
s
.
closegroup
(
gid
,
p
[
-
1
])
s
.
closegroup
(
gid
,
p
[
-
1
])
p
=
sre_parse
.
SubPattern
(
s
,
[(
BRANCH
,
(
None
,
p
))])
p
=
sre_parse
.
SubPattern
(
s
,
[(
BRANCH
,
(
None
,
p
))])
...
...
Lib/sre_compile.py
View file @
be9a4e5c
...
@@ -71,7 +71,8 @@ def _compile(code, pattern, flags):
...
@@ -71,7 +71,8 @@ def _compile(code, pattern, flags):
ASSERT_CODES
=
_ASSERT_CODES
ASSERT_CODES
=
_ASSERT_CODES
if
(
flags
&
SRE_FLAG_IGNORECASE
and
if
(
flags
&
SRE_FLAG_IGNORECASE
and
not
(
flags
&
SRE_FLAG_LOCALE
)
and
not
(
flags
&
SRE_FLAG_LOCALE
)
and
flags
&
SRE_FLAG_UNICODE
):
flags
&
SRE_FLAG_UNICODE
and
not
(
flags
&
SRE_FLAG_ASCII
)):
fixes
=
_ignorecase_fixes
fixes
=
_ignorecase_fixes
else
:
else
:
fixes
=
None
fixes
=
None
...
@@ -137,14 +138,15 @@ def _compile(code, pattern, flags):
...
@@ -137,14 +138,15 @@ def _compile(code, pattern, flags):
else
:
else
:
emit
(
MIN_UNTIL
)
emit
(
MIN_UNTIL
)
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
if
av
[
0
]:
group
,
add_flags
,
del_flags
,
p
=
av
if
group
:
emit
(
MARK
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
)
emit
((
group
-
1
)
*
2
)
# _compile_info(code,
av[1],
flags)
# _compile_info(code,
p, (flags | add_flags) & ~del_
flags)
_compile
(
code
,
av
[
1
],
flags
)
_compile
(
code
,
p
,
(
flags
|
add_flags
)
&
~
del_
flags
)
if
av
[
0
]
:
if
group
:
emit
(
MARK
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
+
1
)
emit
((
group
-
1
)
*
2
+
1
)
elif
op
in
SUCCESS_CODES
:
elif
op
in
SUCCESS_CODES
:
emit
(
op
)
emit
(
op
)
elif
op
in
ASSERT_CODES
:
elif
op
in
ASSERT_CODES
:
...
@@ -172,7 +174,7 @@ def _compile(code, pattern, flags):
...
@@ -172,7 +174,7 @@ def _compile(code, pattern, flags):
av
=
AT_MULTILINE
.
get
(
av
,
av
)
av
=
AT_MULTILINE
.
get
(
av
,
av
)
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
av
=
AT_LOCALE
.
get
(
av
,
av
)
av
=
AT_LOCALE
.
get
(
av
,
av
)
elif
flags
&
SRE_FLAG_UNICODE
:
elif
(
flags
&
SRE_FLAG_UNICODE
)
and
not
(
flags
&
SRE_FLAG_ASCII
)
:
av
=
AT_UNICODE
.
get
(
av
,
av
)
av
=
AT_UNICODE
.
get
(
av
,
av
)
emit
(
av
)
emit
(
av
)
elif
op
is
BRANCH
:
elif
op
is
BRANCH
:
...
@@ -193,7 +195,7 @@ def _compile(code, pattern, flags):
...
@@ -193,7 +195,7 @@ def _compile(code, pattern, flags):
emit
(
op
)
emit
(
op
)
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
av
=
CH_LOCALE
[
av
]
av
=
CH_LOCALE
[
av
]
elif
flags
&
SRE_FLAG_UNICODE
:
elif
(
flags
&
SRE_FLAG_UNICODE
)
and
not
(
flags
&
SRE_FLAG_ASCII
)
:
av
=
CH_UNICODE
[
av
]
av
=
CH_UNICODE
[
av
]
emit
(
av
)
emit
(
av
)
elif
op
is
GROUPREF
:
elif
op
is
GROUPREF
:
...
@@ -237,7 +239,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
...
@@ -237,7 +239,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CH_LOCALE
[
av
])
emit
(
CH_LOCALE
[
av
])
elif
flags
&
SRE_FLAG_UNICODE
:
elif
(
flags
&
SRE_FLAG_UNICODE
)
and
not
(
flags
&
SRE_FLAG_ASCII
)
:
emit
(
CH_UNICODE
[
av
])
emit
(
CH_UNICODE
[
av
])
else
:
else
:
emit
(
av
)
emit
(
av
)
...
@@ -414,14 +416,16 @@ def _get_literal_prefix(pattern):
...
@@ -414,14 +416,16 @@ def _get_literal_prefix(pattern):
prefix
=
[]
prefix
=
[]
prefixappend
=
prefix
.
append
prefixappend
=
prefix
.
append
prefix_skip
=
None
prefix_skip
=
None
got_all
=
True
for
op
,
av
in
pattern
.
data
:
for
op
,
av
in
pattern
.
data
:
if
op
is
LITERAL
:
if
op
is
LITERAL
:
prefixappend
(
av
)
prefixappend
(
av
)
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
prefix1
,
prefix_skip1
,
got_all
=
_get_literal_prefix
(
av
[
1
])
group
,
add_flags
,
del_flags
,
p
=
av
if
add_flags
&
SRE_FLAG_IGNORECASE
:
break
prefix1
,
prefix_skip1
,
got_all
=
_get_literal_prefix
(
p
)
if
prefix_skip
is
None
:
if
prefix_skip
is
None
:
if
av
[
0
]
is
not
None
:
if
group
is
not
None
:
prefix_skip
=
len
(
prefix
)
prefix_skip
=
len
(
prefix
)
elif
prefix_skip1
is
not
None
:
elif
prefix_skip1
is
not
None
:
prefix_skip
=
len
(
prefix
)
+
prefix_skip1
prefix_skip
=
len
(
prefix
)
+
prefix_skip1
...
@@ -429,32 +433,35 @@ def _get_literal_prefix(pattern):
...
@@ -429,32 +433,35 @@ def _get_literal_prefix(pattern):
if
not
got_all
:
if
not
got_all
:
break
break
else
:
else
:
got_all
=
False
break
break
return
prefix
,
prefix_skip
,
got_all
else
:
return
prefix
,
prefix_skip
,
True
return
prefix
,
prefix_skip
,
False
def
_get_charset_prefix
(
pattern
):
def
_get_charset_prefix
(
pattern
):
charset
=
[]
# not used
charset
=
[]
# not used
charsetappend
=
charset
.
append
charsetappend
=
charset
.
append
if
pattern
.
data
:
if
pattern
.
data
:
op
,
av
=
pattern
.
data
[
0
]
op
,
av
=
pattern
.
data
[
0
]
if
op
is
SUBPATTERN
and
av
[
1
]:
if
op
is
SUBPATTERN
:
op
,
av
=
av
[
1
][
0
]
group
,
add_flags
,
del_flags
,
p
=
av
if
op
is
LITERAL
:
if
p
and
not
(
add_flags
&
SRE_FLAG_IGNORECASE
):
charsetappend
((
op
,
av
))
op
,
av
=
p
[
0
]
elif
op
is
BRANCH
:
if
op
is
LITERAL
:
c
=
[]
charsetappend
((
op
,
av
))
cappend
=
c
.
append
elif
op
is
BRANCH
:
for
p
in
av
[
1
]:
c
=
[]
if
not
p
:
cappend
=
c
.
append
break
for
p
in
av
[
1
]:
op
,
av
=
p
[
0
]
if
not
p
:
if
op
is
LITERAL
:
break
cappend
((
op
,
av
))
op
,
av
=
p
[
0
]
if
op
is
LITERAL
:
cappend
((
op
,
av
))
else
:
break
else
:
else
:
break
charset
=
c
else
:
charset
=
c
elif
op
is
BRANCH
:
elif
op
is
BRANCH
:
c
=
[]
c
=
[]
cappend
=
c
.
append
cappend
=
c
.
append
...
...
Lib/sre_parse.py
View file @
be9a4e5c
...
@@ -65,6 +65,12 @@ FLAGS = {
...
@@ -65,6 +65,12 @@ FLAGS = {
"u": SRE_FLAG_UNICODE,
"u": SRE_FLAG_UNICODE,
}
}
GLOBAL_FLAGS = (SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE |
SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE)
class Verbose(Exception):
pass
class Pattern:
class Pattern:
# master pattern object. keeps track of global attributes
# master pattern object. keeps track of global attributes
def __init__(self):
def __init__(self):
...
@@ -184,7 +190,7 @@ class SubPattern:
...
@@ -184,7 +190,7 @@ class SubPattern:
lo = lo + i
lo = lo + i
hi = hi + j
hi = hi + j
elif op is SUBPATTERN:
elif op is SUBPATTERN:
i, j = av[1].getwidth()
i, j = av[
-
1].getwidth()
lo = lo + i
lo = lo + i
hi = hi + j
hi = hi + j
elif op in _REPEATCODES:
elif op in _REPEATCODES:
...
@@ -395,7 +401,7 @@ def _escape(source, escape, state):
...
@@ -395,7 +401,7 @@ def _escape(source, escape, state):
pass
pass
raise source.error("
bad
escape
%
s
" % escape, len(escape))
raise source.error("
bad
escape
%
s
" % escape, len(escape))
def _parse_sub(source, state, nested=True):
def _parse_sub(source, state,
verbose,
nested=True):
# parse an alternation: a|b|c
# parse an alternation: a|b|c
items = []
items = []
...
@@ -403,7 +409,7 @@ def _parse_sub(source, state, nested=True):
...
@@ -403,7 +409,7 @@ def _parse_sub(source, state, nested=True):
sourcematch = source.match
sourcematch = source.match
start = source.tell()
start = source.tell()
while True:
while True:
itemsappend(_parse(source, state))
itemsappend(_parse(source, state
, verbose
))
if not sourcematch("
|
"):
if not sourcematch("
|
"):
break
break
...
@@ -445,10 +451,10 @@ def _parse_sub(source, state, nested=True):
...
@@ -445,10 +451,10 @@ def _parse_sub(source, state, nested=True):
subpattern.append((BRANCH, (None, items)))
subpattern.append((BRANCH, (None, items)))
return subpattern
return subpattern
def _parse_sub_cond(source, state, condgroup):
def _parse_sub_cond(source, state, condgroup
, verbose
):
item_yes = _parse(source, state)
item_yes = _parse(source, state
, verbose
)
if source.match("
|
"):
if source.match("
|
"):
item_no = _parse(source, state)
item_no = _parse(source, state
, verbose
)
if source.next == "
|
":
if source.next == "
|
":
raise source.error("
conditional
backref
with
more
than
two
branches
")
raise source.error("
conditional
backref
with
more
than
two
branches
")
else:
else:
...
@@ -457,7 +463,7 @@ def _parse_sub_cond(source, state, condgroup):
...
@@ -457,7 +463,7 @@ def _parse_sub_cond(source, state, condgroup):
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
return subpattern
def _parse(source, state):
def _parse(source, state
, verbose
):
# parse a simple pattern
# parse a simple pattern
subpattern = SubPattern(state)
subpattern = SubPattern(state)
...
@@ -467,7 +473,6 @@ def _parse(source, state):
...
@@ -467,7 +473,6 @@ def _parse(source, state):
sourcematch = source.match
sourcematch = source.match
_len = len
_len = len
_ord = ord
_ord = ord
verbose = state.flags & SRE_FLAG_VERBOSE
while True:
while True:
...
@@ -621,6 +626,8 @@ def _parse(source, state):
...
@@ -621,6 +626,8 @@ def _parse(source, state):
group
=
True
group
=
True
name
=
None
name
=
None
condgroup
=
None
condgroup
=
None
add_flags
=
0
del_flags
=
0
if
sourcematch
(
"?"
):
if
sourcematch
(
"?"
):
# options
# options
char
=
sourceget
()
char
=
sourceget
()
...
@@ -682,7 +689,7 @@ def _parse(source, state):
...
@@ -682,7 +689,7 @@ def _parse(source, state):
lookbehindgroups
=
state
.
lookbehindgroups
lookbehindgroups
=
state
.
lookbehindgroups
if
lookbehindgroups
is
None
:
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
state
.
groups
state
.
lookbehindgroups
=
state
.
groups
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
,
verbose
)
if
dir
<
0
:
if
dir
<
0
:
if
lookbehindgroups
is
None
:
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
None
state
.
lookbehindgroups
=
None
...
@@ -718,19 +725,13 @@ def _parse(source, state):
...
@@ -718,19 +725,13 @@ def _parse(source, state):
raise
source
.
error
(
"invalid group reference"
,
raise
source
.
error
(
"invalid group reference"
,
len
(
condname
)
+
1
)
len
(
condname
)
+
1
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
elif
char
in
FLAGS
:
elif
char
in
FLAGS
or
char
==
"-"
:
# flags
# flags
while
True
:
flags
=
_parse_flags
(
source
,
state
,
char
)
state
.
flags
|=
FLAGS
[
char
]
if
flags
is
None
:
# global flags
char
=
sourceget
()
continue
if
char
is
None
:
add_flags
,
del_flags
=
flags
raise
source
.
error
(
"missing )"
)
group
=
None
if
char
==
")"
:
break
if
char
not
in
FLAGS
:
raise
source
.
error
(
"unknown flag"
,
len
(
char
))
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
continue
else
:
else
:
raise
source
.
error
(
"unknown extension ?"
+
char
,
raise
source
.
error
(
"unknown extension ?"
+
char
,
len
(
char
)
+
1
)
len
(
char
)
+
1
)
...
@@ -742,15 +743,17 @@ def _parse(source, state):
...
@@ -742,15 +743,17 @@ def _parse(source, state):
except
error
as
err
:
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
from
None
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
from
None
if
condgroup
:
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
,
verbose
)
else
:
else
:
p
=
_parse_sub
(
source
,
state
)
sub_verbose
=
((
verbose
or
(
add_flags
&
SRE_FLAG_VERBOSE
))
and
not
(
del_flags
&
SRE_FLAG_VERBOSE
))
p
=
_parse_sub
(
source
,
state
,
sub_verbose
)
if
not
source
.
match
(
")"
):
if
not
source
.
match
(
")"
):
raise
source
.
error
(
"missing ), unterminated subpattern"
,
raise
source
.
error
(
"missing ), unterminated subpattern"
,
source
.
tell
()
-
start
)
source
.
tell
()
-
start
)
if
group
is
not
None
:
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
subpatternappend
((
SUBPATTERN
,
(
group
,
add_flags
,
del_flags
,
p
)))
elif
this
==
"^"
:
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
subpatternappend
((
AT
,
AT_BEGINNING
))
...
@@ -763,6 +766,53 @@ def _parse(source, state):
...
@@ -763,6 +766,53 @@ def _parse(source, state):
return
subpattern
return
subpattern
def
_parse_flags
(
source
,
state
,
char
):
sourceget
=
source
.
get
add_flags
=
0
del_flags
=
0
if
char
!=
"-"
:
while
True
:
add_flags
|=
FLAGS
[
char
]
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"missing -, : or )"
)
if
char
in
")-:"
:
break
if
char
not
in
FLAGS
:
msg
=
"unknown flag"
if
char
.
isalpha
()
else
"missing -, : or )"
raise
source
.
error
(
msg
,
len
(
char
))
if
char
==
")"
:
if
((
add_flags
&
SRE_FLAG_VERBOSE
)
and
not
(
state
.
flags
&
SRE_FLAG_VERBOSE
)):
raise
Verbose
state
.
flags
|=
add_flags
return
None
if
add_flags
&
GLOBAL_FLAGS
:
raise
source
.
error
(
"bad inline flags: cannot turn on global flag"
,
1
)
if
char
==
"-"
:
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"missing flag"
)
if
char
not
in
FLAGS
:
msg
=
"unknown flag"
if
char
.
isalpha
()
else
"missing flag"
raise
source
.
error
(
msg
,
len
(
char
))
while
True
:
del_flags
|=
FLAGS
[
char
]
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"missing :"
)
if
char
==
":"
:
break
if
char
not
in
FLAGS
:
msg
=
"unknown flag"
if
char
.
isalpha
()
else
"missing :"
raise
source
.
error
(
msg
,
len
(
char
))
assert
char
==
":"
if
del_flags
&
GLOBAL_FLAGS
:
raise
source
.
error
(
"bad inline flags: cannot turn off global flag"
,
1
)
if
add_flags
&
del_flags
:
raise
source
.
error
(
"bad inline flags: flag turned on and off"
,
1
)
return
add_flags
,
del_flags
def
fix_flags
(
src
,
flags
):
def
fix_flags
(
src
,
flags
):
# Check and fix flags according to the type of pattern (str or bytes)
# Check and fix flags according to the type of pattern (str or bytes)
if
isinstance
(
src
,
str
):
if
isinstance
(
src
,
str
):
...
@@ -789,18 +839,22 @@ def parse(str, flags=0, pattern=None):
...
@@ -789,18 +839,22 @@ def parse(str, flags=0, pattern=None):
pattern
.
flags
=
flags
pattern
.
flags
=
flags
pattern
.
str
=
str
pattern
.
str
=
str
p
=
_parse_sub
(
source
,
pattern
,
0
)
try
:
p
=
_parse_sub
(
source
,
pattern
,
flags
&
SRE_FLAG_VERBOSE
,
False
)
except
Verbose
:
# the VERBOSE flag was switched on inside the pattern. to be
# on the safe side, we'll parse the whole thing again...
pattern
=
Pattern
()
pattern
.
flags
=
flags
|
SRE_FLAG_VERBOSE
pattern
.
str
=
str
p
=
_parse_sub
(
source
,
pattern
,
True
,
False
)
p
.
pattern
.
flags
=
fix_flags
(
str
,
p
.
pattern
.
flags
)
p
.
pattern
.
flags
=
fix_flags
(
str
,
p
.
pattern
.
flags
)
if
source
.
next
is
not
None
:
if
source
.
next
is
not
None
:
assert
source
.
next
==
")"
assert
source
.
next
==
")"
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
not
(
flags
&
SRE_FLAG_VERBOSE
)
and
p
.
pattern
.
flags
&
SRE_FLAG_VERBOSE
:
# the VERBOSE flag was switched on inside the pattern. to be
# on the safe side, we'll parse the whole thing again...
return
parse
(
str
,
p
.
pattern
.
flags
)
if
flags
&
SRE_FLAG_DEBUG
:
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
p
.
dump
()
...
...
Lib/test/test_re.py
View file @
be9a4e5c
...
@@ -1376,6 +1376,38 @@ class ReTests(unittest.TestCase):
...
@@ -1376,6 +1376,38 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
self.assertRaises(ValueError, re.compile, b'(?aL)')
self.assertRaises(ValueError, re.compile, b'(?aL)')
def test_scoped_flags(self):
self.assertTrue(re.match(r'(?i:a)b', 'Ab'))
self.assertIsNone(re.match(r'(?i:a)b', 'aB'))
self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE))
self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE))
self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
self.assertTrue(re.match(r'(?x: a) b', 'a b'))
self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
self.checkPatternError(r'(?a:
\
w)
'
,
'bad inline flags: cannot turn on global flag', 3)
self.checkPatternError(r'(?a)(?-a:
\
w)
'
,
'bad inline flags: cannot turn off global flag', 8)
self.checkPatternError(r'(?i-i:a)',
'bad inline flags: flag turned on and off', 5)
self.checkPatternError(r'(?-', 'missing flag', 3)
self.checkPatternError(r'(?-+', 'missing flag', 3)
self.checkPatternError(r'(?-z', 'unknown flag', 3)
self.checkPatternError(r'(?-i', 'missing :', 4)
self.checkPatternError(r'(?-i)', 'missing :', 4)
self.checkPatternError(r'(?-i+', 'missing :', 4)
self.checkPatternError(r'(?-iz', 'unknown flag', 4)
self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0)
self.checkPatternError(r'(?i', 'missing -, : or )', 3)
self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
self.checkPatternError(r'(?iz', 'unknown flag', 3)
def test_bug_6509(self):
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# Replacement strings of both types must parse properly.
# all strings
# all strings
...
@@ -1538,9 +1570,9 @@ class ReTests(unittest.TestCase):
...
@@ -1538,9 +1570,9 @@ class ReTests(unittest.TestCase):
with captured_stdout() as out:
with captured_stdout() as out:
re.compile(pat, re.DEBUG)
re.compile(pat, re.DEBUG)
dump = '''
\
dump = '''
\
SUBPATTERN 1
SUBPATTERN 1
0 0
LITERAL 46
LITERAL 46
SUBPATTERN None
SUBPATTERN None
0 0
BRANCH
BRANCH
IN
IN
LITERAL 99
LITERAL 99
...
@@ -1548,7 +1580,7 @@ SUBPATTERN None
...
@@ -1548,7 +1580,7 @@ SUBPATTERN None
OR
OR
LITERAL 112
LITERAL 112
LITERAL 121
LITERAL 121
SUBPATTERN None
SUBPATTERN None
0 0
GROUPREF_EXISTS 1
GROUPREF_EXISTS 1
AT AT_END
AT AT_END
ELSE
ELSE
...
@@ -1664,7 +1696,7 @@ SUBPATTERN None
...
@@ -1664,7 +1696,7 @@ SUBPATTERN None
self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
self.checkPatternError(r'(?iz)', 'unknown flag', 3)
self.checkPatternError(r'(?iz)', 'unknown flag', 3)
self.checkPatternError(r'(?i', 'missing )', 3)
self.checkPatternError(r'(?i', 'missing
-, : or
)', 3)
self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
...
...
Misc/NEWS
View file @
be9a4e5c
...
@@ -120,6 +120,8 @@ Core and Builtins
...
@@ -120,6 +120,8 @@ Core and Builtins
Library
Library
-------
-------
-
Issue
#
433028
:
Added
support
of
modifier
spans
in
regular
expressions
.
-
Issue
#
24594
:
Validates
persist
parameter
when
opening
MSI
database
-
Issue
#
24594
:
Validates
persist
parameter
when
opening
MSI
database
-
Issue
#
28047
:
Fixed
calculation
of
line
length
used
for
the
base64
CTE
-
Issue
#
28047
:
Fixed
calculation
of
line
length
used
for
the
base64
CTE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment