Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
0b5e61dd
Commit
0b5e61dd
authored
Oct 04, 2017
by
Serhiy Storchaka
Committed by
GitHub
Oct 04, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-30397: Add re.Pattern and re.Match. (#1646)
parent
8d5a3aad
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
120 additions
and
107 deletions
+120
-107
Doc/howto/regex.rst
Doc/howto/regex.rst
+8
-8
Doc/library/fnmatch.rst
Doc/library/fnmatch.rst
+1
-1
Doc/library/re.rst
Doc/library/re.rst
+52
-52
Lib/idlelib/idle_test/test_calltips.py
Lib/idlelib/idle_test/test_calltips.py
+1
-1
Lib/re.py
Lib/re.py
+18
-17
Lib/sre_constants.py
Lib/sre_constants.py
+2
-0
Lib/telnetlib.py
Lib/telnetlib.py
+2
-2
Lib/test/test_optparse.py
Lib/test/test_optparse.py
+1
-3
Lib/test/test_re.py
Lib/test/test_re.py
+2
-2
Lib/unittest/case.py
Lib/unittest/case.py
+2
-2
Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst
...S.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst
+3
-0
Modules/_sre.c
Modules/_sre.c
+25
-16
Modules/clinic/_sre.c.h
Modules/clinic/_sre.c.h
+3
-3
No files found.
Doc/howto/regex.rst
View file @
0b5e61dd
...
...
@@ -402,7 +402,7 @@ should store the result in a variable for later use. ::
>>> m = p.match('tempo')
>>> m #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(0,
5
),
match=
'tempo'
>
<
re
.
Match
object
;
span=
(0,
5
),
match=
'tempo'
>
Now you can query the :ref:`match object
<match-objects>
` for information
about the matching string. :ref:`match object
<match-objects>
` instances
...
...
@@ -441,7 +441,7 @@ case. ::
>>> print(p.match('::: message'))
None
>>> m = p.search('::: message'); print(m) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(4,
11
),
match=
'message'
>
<
re
.
Match
object
;
span=
(4,
11
),
match=
'message'
>
>>> m.group()
'message'
>>> m.span()
...
...
@@ -493,7 +493,7 @@ the RE string added as the first argument, and still return either ``None`` or a
>>> print(re.match(r'From\s+', 'Fromage amk'))
None
>>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(0,
5
),
match=
'From '
>
<
re
.
Match
object
;
span=
(0,
5
),
match=
'From '
>
Under the hood, these functions simply create a pattern object for you
and call the appropriate method on it. They also store the compiled
...
...
@@ -685,7 +685,7 @@ given location, they can obviously be matched an infinite number of times.
line, the RE to use is ``^From``. ::
>>> print(re.search('^From', 'From Here to Eternity')) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(0,
4
),
match=
'From'
>
<
re
.
Match
object
;
span=
(0,
4
),
match=
'From'
>
>>> print(re.search('^From', 'Reciting From Memory'))
None
...
...
@@ -697,11 +697,11 @@ given location, they can obviously be matched an infinite number of times.
or any location followed by a newline character. ::
>>> print(re.search('}$', '{block}')) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(6,
7
),
match=
'}'
>
<
re
.
Match
object
;
span=
(6,
7
),
match=
'}'
>
>>> print(re.search('}$', '{block} '))
None
>>> print(re.search('}$', '{block}\n')) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(6,
7
),
match=
'}'
>
<
re
.
Match
object
;
span=
(6,
7
),
match=
'}'
>
To match a literal ``'$'``, use ``\$`` or enclose it inside a character class,
as in ``[$]``.
...
...
@@ -726,7 +726,7 @@ given location, they can obviously be matched an infinite number of times.
>>> p = re.compile(r'\bclass\b')
>>> print(p.search('no class at all')) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(3,
8
),
match=
'class'
>
<
re
.
Match
object
;
span=
(3,
8
),
match=
'class'
>
>>> print(p.search('the declassified algorithm'))
None
>>> print(p.search('one subclass is'))
...
...
@@ -744,7 +744,7 @@ given location, they can obviously be matched an infinite number of times.
>>> print(p.search('no class at all'))
None
>>> print(p.search('\b' + 'class' + '\b')) #doctest: +ELLIPSIS
<
_sre
.
SRE_
Match
object
;
span=
(0,
7
),
match=
'\x08class\x08'
>
<
re
.
Match
object
;
span=
(0,
7
),
match=
'\x08class\x08'
>
Second, inside a character class, where there's no use for this assertion,
``\b`` represents the backspace character, for compatibility with Python's
...
...
Doc/library/fnmatch.rst
View file @
0b5e61dd
...
...
@@ -86,7 +86,7 @@ patterns.
'(?s:.*\\.txt)\\Z'
>>> reobj = re.compile(regex)
>>> reobj.match('foobar.txt')
<
_sre.SRE_
Match object; span=(0, 10), match='foobar.txt'>
<
re.
Match object; span=(0, 10), match='foobar.txt'>
.. seealso::
...
...
Doc/library/re.rst
View file @
0b5e61dd
This diff is collapsed.
Click to expand it.
Lib/idlelib/idle_test/test_calltips.py
View file @
0b5e61dd
...
...
@@ -74,7 +74,7 @@ class Get_signatureTest(unittest.TestCase):
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the
m
atch object and must return'''
)
a callable, it's passed the
M
atch object and must return'''
)
gtest
(
p
.
sub
,
'''(repl, string, count=0)
\
n
Return the string obtained by replacing the leftmost non-overlapping occurrences o...'''
)
def
test_signature_wrap
(
self
):
...
...
Lib/re.py
View file @
0b5e61dd
...
...
@@ -92,8 +92,8 @@ This module exports the following functions:
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a
m
atch object for each match.
compile Compile a pattern into a
RegexO
bject.
finditer Return an iterator yielding a
M
atch object for each match.
compile Compile a pattern into a
Pattern o
bject.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.
...
...
@@ -139,7 +139,7 @@ except ImportError:
__all__
=
[
"match"
,
"fullmatch"
,
"search"
,
"sub"
,
"subn"
,
"split"
,
"findall"
,
"finditer"
,
"compile"
,
"purge"
,
"template"
,
"escape"
,
"error"
,
"A"
,
"I"
,
"L"
,
"M"
,
"S"
,
"X"
,
"U"
,
"error"
,
"
Pattern"
,
"Match"
,
"
A"
,
"I"
,
"L"
,
"M"
,
"S"
,
"X"
,
"U"
,
"ASCII"
,
"IGNORECASE"
,
"LOCALE"
,
"MULTILINE"
,
"DOTALL"
,
"VERBOSE"
,
"UNICODE"
,
]
...
...
@@ -175,17 +175,17 @@ error = sre_compile.error
def
match
(
pattern
,
string
,
flags
=
0
):
"""Try to apply the pattern at the start of the string, returning
a
m
atch object, or None if no match was found."""
a
M
atch object, or None if no match was found."""
return
_compile
(
pattern
,
flags
).
match
(
string
)
def
fullmatch
(
pattern
,
string
,
flags
=
0
):
"""Try to apply the pattern to all of the string, returning
a
m
atch object, or None if no match was found."""
a
M
atch object, or None if no match was found."""
return
_compile
(
pattern
,
flags
).
fullmatch
(
string
)
def
search
(
pattern
,
string
,
flags
=
0
):
"""Scan through string looking for a match to the pattern, returning
a
m
atch object, or None if no match was found."""
a
M
atch object, or None if no match was found."""
return
_compile
(
pattern
,
flags
).
search
(
string
)
def
sub
(
pattern
,
repl
,
string
,
count
=
0
,
flags
=
0
):
...
...
@@ -193,7 +193,7 @@ def sub(pattern, repl, string, count=0, flags=0):
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the
m
atch object and must return
a callable, it's passed the
M
atch object and must return
a replacement string to be used."""
return
_compile
(
pattern
,
flags
).
sub
(
repl
,
string
,
count
)
...
...
@@ -204,7 +204,7 @@ def subn(pattern, repl, string, count=0, flags=0):
string by the replacement repl. number is the number of
substitutions that were made. repl can be either a string or a
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the
m
atch object and must
If it is a callable, it's passed the
M
atch object and must
return a replacement string to be used."""
return
_compile
(
pattern
,
flags
).
subn
(
repl
,
string
,
count
)
...
...
@@ -230,13 +230,13 @@ def findall(pattern, string, flags=0):
def
finditer
(
pattern
,
string
,
flags
=
0
):
"""Return an iterator over all non-overlapping matches in the
string. For each match, the iterator returns a
m
atch object.
string. For each match, the iterator returns a
M
atch object.
Empty matches are included in the result."""
return
_compile
(
pattern
,
flags
).
finditer
(
string
)
def
compile
(
pattern
,
flags
=
0
):
"Compile a regular expression pattern, returning a
p
attern object."
"Compile a regular expression pattern, returning a
P
attern object."
return
_compile
(
pattern
,
flags
)
def
purge
():
...
...
@@ -245,7 +245,7 @@ def purge():
_compile_repl
.
cache_clear
()
def
template
(
pattern
,
flags
=
0
):
"Compile a template pattern, returning a
p
attern object"
"Compile a template pattern, returning a
P
attern object"
return
_compile
(
pattern
,
flags
|
T
)
# SPECIAL_CHARS
...
...
@@ -264,13 +264,14 @@ def escape(pattern):
pattern
=
str
(
pattern
,
'latin1'
)
return
pattern
.
translate
(
_special_chars_map
).
encode
(
'latin1'
)
Pattern
=
type
(
sre_compile
.
compile
(
''
,
0
))
Match
=
type
(
sre_compile
.
compile
(
''
,
0
).
match
(
''
))
# --------------------------------------------------------------------
# internals
_cache
=
OrderedDict
()
_pattern_type
=
type
(
sre_compile
.
compile
(
""
,
0
))
_MAXCACHE
=
512
def
_compile
(
pattern
,
flags
):
# internal: compile pattern
...
...
@@ -278,7 +279,7 @@ def _compile(pattern, flags):
return
_cache
[
type
(
pattern
),
pattern
,
flags
]
except
KeyError
:
pass
if
isinstance
(
pattern
,
_pattern_type
):
if
isinstance
(
pattern
,
Pattern
):
if
flags
:
raise
ValueError
(
"cannot process flags argument with a compiled pattern"
)
...
...
@@ -301,12 +302,12 @@ def _compile_repl(repl, pattern):
return
sre_parse
.
parse_template
(
repl
,
pattern
)
def
_expand
(
pattern
,
match
,
template
):
# internal:
m
atch.expand implementation hook
# internal:
M
atch.expand implementation hook
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
return
sre_parse
.
expand_template
(
template
,
match
)
def
_subx
(
pattern
,
template
):
# internal:
p
attern.sub/subn implementation helper
# internal:
P
attern.sub/subn implementation helper
template
=
_compile_repl
(
template
,
pattern
)
if
not
template
[
0
]
and
len
(
template
[
1
])
==
1
:
# literal replacement
...
...
@@ -322,7 +323,7 @@ import copyreg
def
_pickle
(
p
):
return
_compile
,
(
p
.
pattern
,
p
.
flags
)
copyreg
.
pickle
(
_pattern_type
,
_pickle
,
_compile
)
copyreg
.
pickle
(
Pattern
,
_pickle
,
_compile
)
# --------------------------------------------------------------------
# experimental stuff (see python-dev discussions for details)
...
...
Lib/sre_constants.py
View file @
0b5e61dd
...
...
@@ -32,6 +32,8 @@ class error(Exception):
colno: The column corresponding to pos (may be None)
"""
__module__
=
're'
def
__init__
(
self
,
msg
,
pattern
=
None
,
pos
=
None
):
self
.
msg
=
msg
self
.
pattern
=
pattern
...
...
Lib/telnetlib.py
View file @
0b5e61dd
...
...
@@ -585,12 +585,12 @@ class Telnet:
"""Read until one from a list of a regular expressions matches.
The first argument is a list of regular expressions, either
compiled (re.
RegexObject
instances) or uncompiled (strings).
compiled (re.
Pattern
instances) or uncompiled (strings).
The optional second argument is a timeout, in seconds; default
is no timeout.
Return a tuple of three items: the index in the list of the
first regular expression that matches; the
m
atch object
first regular expression that matches; the
re.M
atch object
returned; and the text read up till and including the match.
If EOF is read and no text was read, raise EOFError.
...
...
Lib/test/test_optparse.py
View file @
0b5e61dd
...
...
@@ -24,8 +24,6 @@ from optparse import make_option, Option, \
from
optparse
import
_match_abbrev
from
optparse
import
_parse_num
retype
=
type
(
re
.
compile
(
''
))
class
InterceptedError
(
Exception
):
def
__init__
(
self
,
error_message
=
None
,
...
...
@@ -107,7 +105,7 @@ Args were %(args)s.""" % locals ())
func
(
*
args
,
**
kwargs
)
except
expected_exception
as
err
:
actual_message
=
str
(
err
)
if
isinstance
(
expected_message
,
re
type
):
if
isinstance
(
expected_message
,
re
.
Pattern
):
self
.
assertTrue
(
expected_message
.
search
(
actual_message
),
"""
\
expected exception message pattern:
...
...
Lib/test/test_re.py
View file @
0b5e61dd
...
...
@@ -1596,9 +1596,9 @@ class ReTests(unittest.TestCase):
def test_compile(self):
# Test return value when given string and pattern as parameter
pattern = re.compile('random pattern')
self.assertIsInstance(pattern, re.
_pattern_type
)
self.assertIsInstance(pattern, re.
Pattern
)
same_pattern = re.compile(pattern)
self.assertIsInstance(same_pattern, re.
_pattern_type
)
self.assertIsInstance(same_pattern, re.
Pattern
)
self.assertIs(same_pattern, pattern)
# Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0)
...
...
Lib/unittest/case.py
View file @
0b5e61dd
...
...
@@ -1273,7 +1273,7 @@ class TestCase(object):
Args:
expected_exception: Exception class expected to be raised.
expected_regex: Regex (re
p
attern object or string) expected
expected_regex: Regex (re
.P
attern object or string) expected
to be found in error message.
args: Function to be called and extra positional args.
kwargs: Extra kwargs.
...
...
@@ -1292,7 +1292,7 @@ class TestCase(object):
Args:
expected_warning: Warning class expected to be triggered.
expected_regex: Regex (re
p
attern object or string) expected
expected_regex: Regex (re
.P
attern object or string) expected
to be found in error message.
args: Function to be called and extra positional args.
kwargs: Extra kwargs.
...
...
Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst
0 → 100644
View file @
0b5e61dd
The types of compiled regular objects and match objects are now exposed as
`re.Pattern` and `re.Match`. This adds information in pydoc output for the
re module.
Modules/_sre.c
View file @
0b5e61dd
...
...
@@ -630,13 +630,13 @@ _sre.SRE_Pattern.fullmatch
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Matches against all of the string
Matches against all of the string
.
[clinic start generated code]*/
static
PyObject
*
_sre_SRE_Pattern_fullmatch_impl
(
PatternObject
*
self
,
PyObject
*
string
,
Py_ssize_t
pos
,
Py_ssize_t
endpos
)
/*[clinic end generated code: output=5833c47782a35f4a input=
a6f640614aaefceb
]*/
/*[clinic end generated code: output=5833c47782a35f4a input=
d9fb03a7625b5828
]*/
{
SRE_STATE
state
;
Py_ssize_t
status
;
...
...
@@ -1341,7 +1341,7 @@ done:
return
result
;
}
PyDoc_STRVAR
(
pattern_doc
,
"Compiled regular expression object
s
"
);
PyDoc_STRVAR
(
pattern_doc
,
"Compiled regular expression object
.
"
);
/* PatternObject's 'groupindex' method. */
static
PyObject
*
...
...
@@ -2221,12 +2221,12 @@ _sre.SRE_Match.span
group: object(c_default="NULL") = 0
/
For
MatchO
bject m, return the 2-tuple (m.start(group), m.end(group)).
For
match o
bject m, return the 2-tuple (m.start(group), m.end(group)).
[clinic start generated code]*/
static
PyObject
*
_sre_SRE_Match_span_impl
(
MatchObject
*
self
,
PyObject
*
group
)
/*[clinic end generated code: output=f02ae40594d14fe6 input=
49092b6008d176d3
]*/
/*[clinic end generated code: output=f02ae40594d14fe6 input=
8fa6014e982d71d4
]*/
{
Py_ssize_t
index
=
match_getindex
(
self
,
group
);
...
...
@@ -2625,15 +2625,18 @@ static PyGetSetDef pattern_getset[] = {
#define PAT_OFF(x) offsetof(PatternObject, x)
static
PyMemberDef
pattern_members
[]
=
{
{
"pattern"
,
T_OBJECT
,
PAT_OFF
(
pattern
),
READONLY
},
{
"flags"
,
T_INT
,
PAT_OFF
(
flags
),
READONLY
},
{
"groups"
,
T_PYSSIZET
,
PAT_OFF
(
groups
),
READONLY
},
{
"pattern"
,
T_OBJECT
,
PAT_OFF
(
pattern
),
READONLY
,
"The pattern string from which the RE object was compiled."
},
{
"flags"
,
T_INT
,
PAT_OFF
(
flags
),
READONLY
,
"The regex matching flags."
},
{
"groups"
,
T_PYSSIZET
,
PAT_OFF
(
groups
),
READONLY
,
"The number of capturing groups in the pattern."
},
{
NULL
}
/* Sentinel */
};
static
PyTypeObject
Pattern_Type
=
{
PyVarObject_HEAD_INIT
(
NULL
,
0
)
"
_"
SRE_MODULE
".SRE_
Pattern"
,
"
re.
Pattern"
,
sizeof
(
PatternObject
),
sizeof
(
SRE_CODE
),
(
destructor
)
pattern_dealloc
,
/* tp_dealloc */
0
,
/* tp_print */
...
...
@@ -2685,18 +2688,24 @@ static PyMethodDef match_methods[] = {
};
static
PyGetSetDef
match_getset
[]
=
{
{
"lastindex"
,
(
getter
)
match_lastindex_get
,
(
setter
)
NULL
},
{
"lastgroup"
,
(
getter
)
match_lastgroup_get
,
(
setter
)
NULL
},
{
"lastindex"
,
(
getter
)
match_lastindex_get
,
(
setter
)
NULL
,
"The integer index of the last matched capturing group."
},
{
"lastgroup"
,
(
getter
)
match_lastgroup_get
,
(
setter
)
NULL
,
"The name of the last matched capturing group."
},
{
"regs"
,
(
getter
)
match_regs_get
,
(
setter
)
NULL
},
{
NULL
}
};
#define MATCH_OFF(x) offsetof(MatchObject, x)
static
PyMemberDef
match_members
[]
=
{
{
"string"
,
T_OBJECT
,
MATCH_OFF
(
string
),
READONLY
},
{
"re"
,
T_OBJECT
,
MATCH_OFF
(
pattern
),
READONLY
},
{
"pos"
,
T_PYSSIZET
,
MATCH_OFF
(
pos
),
READONLY
},
{
"endpos"
,
T_PYSSIZET
,
MATCH_OFF
(
endpos
),
READONLY
},
{
"string"
,
T_OBJECT
,
MATCH_OFF
(
string
),
READONLY
,
"The string passed to match() or search()."
},
{
"re"
,
T_OBJECT
,
MATCH_OFF
(
pattern
),
READONLY
,
"The regular expression object."
},
{
"pos"
,
T_PYSSIZET
,
MATCH_OFF
(
pos
),
READONLY
,
"The index into the string at which the RE engine started looking for a match."
},
{
"endpos"
,
T_PYSSIZET
,
MATCH_OFF
(
endpos
),
READONLY
,
"The index into the string beyond which the RE engine will not go."
},
{
NULL
}
};
...
...
@@ -2705,7 +2714,7 @@ static PyMemberDef match_members[] = {
static
PyTypeObject
Match_Type
=
{
PyVarObject_HEAD_INIT
(
NULL
,
0
)
"
_"
SRE_MODULE
".SRE_
Match"
,
"
re.
Match"
,
sizeof
(
MatchObject
),
sizeof
(
Py_ssize_t
),
(
destructor
)
match_dealloc
,
/* tp_dealloc */
0
,
/* tp_print */
...
...
Modules/clinic/_sre.c.h
View file @
0b5e61dd
...
...
@@ -190,7 +190,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__,
"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)
\n
"
"--
\n
"
"
\n
"
"Matches against all of the string"
);
"Matches against all of the string
.
"
);
#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \
{"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__},
...
...
@@ -682,7 +682,7 @@ PyDoc_STRVAR(_sre_SRE_Match_span__doc__,
"span($self, group=0, /)
\n
"
"--
\n
"
"
\n
"
"For
MatchO
bject m, return the 2-tuple (m.start(group), m.end(group))."
);
"For
match o
bject m, return the 2-tuple (m.start(group), m.end(group))."
);
#define _SRE_SRE_MATCH_SPAN_METHODDEF \
{"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__},
...
...
@@ -765,4 +765,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return
_sre_SRE_Scanner_search_impl
(
self
);
}
/*[clinic end generated code: output=
6e3fb17fef1be436
input=a9049054013a1b77]*/
/*[clinic end generated code: output=
1e6a1be31302df09
input=a9049054013a1b77]*/
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment