Commit 0b5e61dd authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-30397: Add re.Pattern and re.Match. (#1646)

parent 8d5a3aad
......@@ -402,7 +402,7 @@ should store the result in a variable for later use. ::
>>> m = p.match('tempo')
>>> m #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 5), match='tempo'>
<re.Match object; span=(0, 5), match='tempo'>
Now you can query the :ref:`match object <match-objects>` for information
about the matching string. :ref:`match object <match-objects>` instances
......@@ -441,7 +441,7 @@ case. ::
>>> print(p.match('::: message'))
None
>>> m = p.search('::: message'); print(m) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(4, 11), match='message'>
<re.Match object; span=(4, 11), match='message'>
>>> m.group()
'message'
>>> m.span()
......@@ -493,7 +493,7 @@ the RE string added as the first argument, and still return either ``None`` or a
>>> print(re.match(r'From\s+', 'Fromage amk'))
None
>>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 5), match='From '>
<re.Match object; span=(0, 5), match='From '>
Under the hood, these functions simply create a pattern object for you
and call the appropriate method on it. They also store the compiled
......@@ -685,7 +685,7 @@ given location, they can obviously be matched an infinite number of times.
line, the RE to use is ``^From``. ::
>>> print(re.search('^From', 'From Here to Eternity')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 4), match='From'>
<re.Match object; span=(0, 4), match='From'>
>>> print(re.search('^From', 'Reciting From Memory'))
None
......@@ -697,11 +697,11 @@ given location, they can obviously be matched an infinite number of times.
or any location followed by a newline character. ::
>>> print(re.search('}$', '{block}')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(6, 7), match='}'>
<re.Match object; span=(6, 7), match='}'>
>>> print(re.search('}$', '{block} '))
None
>>> print(re.search('}$', '{block}\n')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(6, 7), match='}'>
<re.Match object; span=(6, 7), match='}'>
To match a literal ``'$'``, use ``\$`` or enclose it inside a character class,
as in ``[$]``.
......@@ -726,7 +726,7 @@ given location, they can obviously be matched an infinite number of times.
>>> p = re.compile(r'\bclass\b')
>>> print(p.search('no class at all')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(3, 8), match='class'>
<re.Match object; span=(3, 8), match='class'>
>>> print(p.search('the declassified algorithm'))
None
>>> print(p.search('one subclass is'))
......@@ -744,7 +744,7 @@ given location, they can obviously be matched an infinite number of times.
>>> print(p.search('no class at all'))
None
>>> print(p.search('\b' + 'class' + '\b')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 7), match='\x08class\x08'>
<re.Match object; span=(0, 7), match='\x08class\x08'>
Second, inside a character class, where there's no use for this assertion,
``\b`` represents the backspace character, for compatibility with Python's
......
......@@ -86,7 +86,7 @@ patterns.
'(?s:.*\\.txt)\\Z'
>>> reobj = re.compile(regex)
>>> reobj.match('foobar.txt')
<_sre.SRE_Match object; span=(0, 10), match='foobar.txt'>
<re.Match object; span=(0, 10), match='foobar.txt'>
.. seealso::
......
This diff is collapsed.
......@@ -74,7 +74,7 @@ class Get_signatureTest(unittest.TestCase):
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the match object and must return''')
a callable, it's passed the Match object and must return''')
gtest(p.sub, '''(repl, string, count=0)\nReturn the string obtained by replacing the leftmost non-overlapping occurrences o...''')
def test_signature_wrap(self):
......
......@@ -92,8 +92,8 @@ This module exports the following functions:
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a match object for each match.
compile Compile a pattern into a RegexObject.
finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.
......@@ -139,7 +139,7 @@ except ImportError:
__all__ = [
"match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "template", "escape",
"error", "A", "I", "L", "M", "S", "X", "U",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE",
]
......@@ -175,17 +175,17 @@ error = sre_compile.error
def match(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning
a match object, or None if no match was found."""
a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
a match object, or None if no match was found."""
a Match object, or None if no match was found."""
return _compile(pattern, flags).fullmatch(string)
def search(pattern, string, flags=0):
"""Scan through string looking for a match to the pattern, returning
a match object, or None if no match was found."""
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0, flags=0):
......@@ -193,7 +193,7 @@ def sub(pattern, repl, string, count=0, flags=0):
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the match object and must return
a callable, it's passed the Match object and must return
a replacement string to be used."""
return _compile(pattern, flags).sub(repl, string, count)
......@@ -204,7 +204,7 @@ def subn(pattern, repl, string, count=0, flags=0):
string by the replacement repl. number is the number of
substitutions that were made. repl can be either a string or a
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the match object and must
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
return _compile(pattern, flags).subn(repl, string, count)
......@@ -230,13 +230,13 @@ def findall(pattern, string, flags=0):
def finditer(pattern, string, flags=0):
"""Return an iterator over all non-overlapping matches in the
string. For each match, the iterator returns a match object.
string. For each match, the iterator returns a Match object.
Empty matches are included in the result."""
return _compile(pattern, flags).finditer(string)
def compile(pattern, flags=0):
"Compile a regular expression pattern, returning a pattern object."
"Compile a regular expression pattern, returning a Pattern object."
return _compile(pattern, flags)
def purge():
......@@ -245,7 +245,7 @@ def purge():
_compile_repl.cache_clear()
def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
"Compile a template pattern, returning a Pattern object"
return _compile(pattern, flags|T)
# SPECIAL_CHARS
......@@ -264,13 +264,14 @@ def escape(pattern):
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')
Pattern = type(sre_compile.compile('', 0))
Match = type(sre_compile.compile('', 0).match(''))
# --------------------------------------------------------------------
# internals
_cache = OrderedDict()
_pattern_type = type(sre_compile.compile("", 0))
_MAXCACHE = 512
def _compile(pattern, flags):
# internal: compile pattern
......@@ -278,7 +279,7 @@ def _compile(pattern, flags):
return _cache[type(pattern), pattern, flags]
except KeyError:
pass
if isinstance(pattern, _pattern_type):
if isinstance(pattern, Pattern):
if flags:
raise ValueError(
"cannot process flags argument with a compiled pattern")
......@@ -301,12 +302,12 @@ def _compile_repl(repl, pattern):
return sre_parse.parse_template(repl, pattern)
def _expand(pattern, match, template):
# internal: match.expand implementation hook
# internal: Match.expand implementation hook
template = sre_parse.parse_template(template, pattern)
return sre_parse.expand_template(template, match)
def _subx(pattern, template):
# internal: pattern.sub/subn implementation helper
# internal: Pattern.sub/subn implementation helper
template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1:
# literal replacement
......@@ -322,7 +323,7 @@ import copyreg
def _pickle(p):
return _compile, (p.pattern, p.flags)
copyreg.pickle(_pattern_type, _pickle, _compile)
copyreg.pickle(Pattern, _pickle, _compile)
# --------------------------------------------------------------------
# experimental stuff (see python-dev discussions for details)
......
......@@ -32,6 +32,8 @@ class error(Exception):
colno: The column corresponding to pos (may be None)
"""
__module__ = 're'
def __init__(self, msg, pattern=None, pos=None):
self.msg = msg
self.pattern = pattern
......
......@@ -585,12 +585,12 @@ class Telnet:
"""Read until one from a list of a regular expressions matches.
The first argument is a list of regular expressions, either
compiled (re.RegexObject instances) or uncompiled (strings).
compiled (re.Pattern instances) or uncompiled (strings).
The optional second argument is a timeout, in seconds; default
is no timeout.
Return a tuple of three items: the index in the list of the
first regular expression that matches; the match object
first regular expression that matches; the re.Match object
returned; and the text read up till and including the match.
If EOF is read and no text was read, raise EOFError.
......
......@@ -24,8 +24,6 @@ from optparse import make_option, Option, \
from optparse import _match_abbrev
from optparse import _parse_num
retype = type(re.compile(''))
class InterceptedError(Exception):
def __init__(self,
error_message=None,
......@@ -107,7 +105,7 @@ Args were %(args)s.""" % locals ())
func(*args, **kwargs)
except expected_exception as err:
actual_message = str(err)
if isinstance(expected_message, retype):
if isinstance(expected_message, re.Pattern):
self.assertTrue(expected_message.search(actual_message),
"""\
expected exception message pattern:
......
......@@ -1596,9 +1596,9 @@ class ReTests(unittest.TestCase):
def test_compile(self):
# Test return value when given string and pattern as parameter
pattern = re.compile('random pattern')
self.assertIsInstance(pattern, re._pattern_type)
self.assertIsInstance(pattern, re.Pattern)
same_pattern = re.compile(pattern)
self.assertIsInstance(same_pattern, re._pattern_type)
self.assertIsInstance(same_pattern, re.Pattern)
self.assertIs(same_pattern, pattern)
# Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0)
......
......@@ -1273,7 +1273,7 @@ class TestCase(object):
Args:
expected_exception: Exception class expected to be raised.
expected_regex: Regex (re pattern object or string) expected
expected_regex: Regex (re.Pattern object or string) expected
to be found in error message.
args: Function to be called and extra positional args.
kwargs: Extra kwargs.
......@@ -1292,7 +1292,7 @@ class TestCase(object):
Args:
expected_warning: Warning class expected to be triggered.
expected_regex: Regex (re pattern object or string) expected
expected_regex: Regex (re.Pattern object or string) expected
to be found in error message.
args: Function to be called and extra positional args.
kwargs: Extra kwargs.
......
The types of compiled regular objects and match objects are now exposed as
`re.Pattern` and `re.Match`. This adds information in pydoc output for the
re module.
......@@ -630,13 +630,13 @@ _sre.SRE_Pattern.fullmatch
pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Matches against all of the string
Matches against all of the string.
[clinic start generated code]*/
static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
{
SRE_STATE state;
Py_ssize_t status;
......@@ -1341,7 +1341,7 @@ done:
return result;
}
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
/* PatternObject's 'groupindex' method. */
static PyObject *
......@@ -2221,12 +2221,12 @@ _sre.SRE_Match.span
group: object(c_default="NULL") = 0
/
For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
For match object m, return the 2-tuple (m.start(group), m.end(group)).
[clinic start generated code]*/
static PyObject *
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
{
Py_ssize_t index = match_getindex(self, group);
......@@ -2625,15 +2625,18 @@ static PyGetSetDef pattern_getset[] = {
#define PAT_OFF(x) offsetof(PatternObject, x)
static PyMemberDef pattern_members[] = {
{"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
{"flags", T_INT, PAT_OFF(flags), READONLY},
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
{"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
"The pattern string from which the RE object was compiled."},
{"flags", T_INT, PAT_OFF(flags), READONLY,
"The regex matching flags."},
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
"The number of capturing groups in the pattern."},
{NULL} /* Sentinel */
};
static PyTypeObject Pattern_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_" SRE_MODULE ".SRE_Pattern",
"re.Pattern",
sizeof(PatternObject), sizeof(SRE_CODE),
(destructor)pattern_dealloc, /* tp_dealloc */
0, /* tp_print */
......@@ -2685,18 +2688,24 @@ static PyMethodDef match_methods[] = {
};
static PyGetSetDef match_getset[] = {
{"lastindex", (getter)match_lastindex_get, (setter)NULL},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
{"lastindex", (getter)match_lastindex_get, (setter)NULL,
"The integer index of the last matched capturing group."},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
"The name of the last matched capturing group."},
{"regs", (getter)match_regs_get, (setter)NULL},
{NULL}
};
#define MATCH_OFF(x) offsetof(MatchObject, x)
static PyMemberDef match_members[] = {
{"string", T_OBJECT, MATCH_OFF(string), READONLY},
{"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
{"string", T_OBJECT, MATCH_OFF(string), READONLY,
"The string passed to match() or search()."},
{"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
"The regular expression object."},
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
"The index into the string at which the RE engine started looking for a match."},
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
"The index into the string beyond which the RE engine will not go."},
{NULL}
};
......@@ -2705,7 +2714,7 @@ static PyMemberDef match_members[] = {
static PyTypeObject Match_Type = {
PyVarObject_HEAD_INIT(NULL,0)
"_" SRE_MODULE ".SRE_Match",
"re.Match",
sizeof(MatchObject), sizeof(Py_ssize_t),
(destructor)match_dealloc, /* tp_dealloc */
0, /* tp_print */
......
......@@ -190,7 +190,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__,
"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n"
"\n"
"Matches against all of the string");
"Matches against all of the string.");
#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \
{"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__},
......@@ -682,7 +682,7 @@ PyDoc_STRVAR(_sre_SRE_Match_span__doc__,
"span($self, group=0, /)\n"
"--\n"
"\n"
"For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
"For match object m, return the 2-tuple (m.start(group), m.end(group)).");
#define _SRE_SRE_MATCH_SPAN_METHODDEF \
{"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__},
......@@ -765,4 +765,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Scanner_search_impl(self);
}
/*[clinic end generated code: output=6e3fb17fef1be436 input=a9049054013a1b77]*/
/*[clinic end generated code: output=1e6a1be31302df09 input=a9049054013a1b77]*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment